From 9ff2bf5cc4eb2c4f6a09cc0d7d69c58a3a1ebaf3 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 14:36:14 +0100
Subject: [PATCH 01/18] Update docs

---
 docs/source/reference/constants.rst |  3 ++
 docs/source/tutorial/byod.rst       | 59 ++++++++++++-----------------
 src/pykeen/datasets/__init__.py     |  9 ++++-
 src/pykeen/datasets/base.py         | 20 +++++-----
 src/pykeen/typing.py                |  4 ++
 5 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/docs/source/reference/constants.rst b/docs/source/reference/constants.rst
index 678d6016ec..3ca0facdba 100644
--- a/docs/source/reference/constants.rst
+++ b/docs/source/reference/constants.rst
@@ -2,3 +2,6 @@ Constants
 =========
 .. automodule:: pykeen.constants
     :members:
+
+.. automodule:: pykeen.typing
+    :members:
diff --git a/docs/source/tutorial/byod.rst b/docs/source/tutorial/byod.rst
index 0947f8ab89..1d03387e47 100644
--- a/docs/source/tutorial/byod.rst
+++ b/docs/source/tutorial/byod.rst
@@ -9,20 +9,16 @@ You've got a training and testing file as 3-column TSV files, all ready to go. Y
 any entities or relations appearing in the testing set that don't appear in the training set. Load them in the
 pipeline like this:
 
-.. code-block:: python
-
-    from pykeen.triples import TriplesFactory
-    from pykeen.pipeline import pipeline
-
-    training_path: str = ...
-    testing_path: str = ...
-
-    result = pipeline(
-        training_triples_factory=training_path,
-        testing_triples_factory=testing_path,
-        model='TransE',
-    )
-    result.save_to_directory('test_pre_stratified_transe')
+>>> import pystow
+>>> from pykeen.triples import TriplesFactory
+>>> from pykeen.pipeline import pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
+>>> result = pipeline(
+...     training=NATIONS_TRAIN_PATH,
+...     testing=NATIONS_TEST_PATH,
+...     model='TransE',
+... )
+>>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_pre_stratified_transe'))
 
 PyKEEN will take care of making sure that the entities are mapped from their labels to appropriate integer
 (technically, 0-dimensional :class:`torch.LongTensor`) indexes and that the different sets of triples
@@ -35,14 +31,12 @@ the :func:`pykeen.pipeline.pipeline` as in:
 
     from pykeen.triples import TriplesFactory
     from pykeen.hpo import hpo_pipeline
-
-    training_path: str = ...
-    testing_path: str = ...
+    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
 
     result = hpo_pipeline(
         n_trials=30,
-        training_triples_factory=training_path,
-        testing_triples_factory=testing_path,
+        training=NATIONS_TRAIN_PATH,
+        testing=NATIONS_TEST_PATH,
         model='TransE',
     )
     result.save_to_directory('test_hpo_pre_stratified_transe')
@@ -57,13 +51,11 @@ to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse
 
     from pykeen.triples import TriplesFactory
     from pykeen.pipeline import pipeline
-
-    training_path: str = ...
-    testing_path: str = ...
+    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
 
     result = pipeline(
-        training_triples_factory=training_path,
-        testing_triples_factory=testing_path,
+        training=NATIONS_TRAIN_PATH,
+        testing=NATIONS_TEST_PATH,
         dataset_kwargs={'create_inverse_triples': True},
         model='TransE',
     )
@@ -76,13 +68,11 @@ TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface.
 
     from pykeen.triples import TriplesFactory
     from pykeen.pipeline import pipeline
+    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
 
-    training_path: str = ...
-    testing_path: str = ...
-
-    training = TriplesFactory(path=training_path)
+    training = TriplesFactory(path=NATIONS_TRAIN_PATH)
     testing = TriplesFactory(
-        path=testing_path,
+        path=NATIONS_TEST_PATH,
         entity_to_id=training.entity_to_id,
         relation_to_id=training.relation_to_id,
     )
@@ -110,16 +100,14 @@ desired behavior as in:
 
     from pykeen.triples import TriplesFactory
     from pykeen.pipeline import pipeline
-
-    training_path: str = ...
-    testing_path: str = ...
+    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
 
     training = TriplesFactory(
-        path=training_path,
+        path=NATIONS_TRAIN_PATH,
         create_inverse_triples=True,
     )
     testing = TriplesFactory(
-        path=testing_path,
+        path=NATIONS_TEST_PATH,
         entity_to_id=training.entity_to_id,
         relation_to_id=training.relation_to_id,
         create_inverse_triples=True,
@@ -145,8 +133,9 @@ a stratified dataset.
 
     from pykeen.triples import TriplesFactory
     from pykeen.pipeline import pipeline
+    from pykeen.datasets.nations import NATIONS_TRAIN_PATH
 
-    tf = TriplesFactory(path=...)
+    tf = TriplesFactory(path=NATIONS_TRAIN_PATH)
     training, testing = tf.split()
 
     result = pipeline(
diff --git a/src/pykeen/datasets/__init__.py b/src/pykeen/datasets/__init__.py
index 842374448b..5019000c68 100644
--- a/src/pykeen/datasets/__init__.py
+++ b/src/pykeen/datasets/__init__.py
@@ -125,7 +125,7 @@ def get_dataset(
         raise TypeError(f'Dataset is invalid type: {type(dataset)}')
 
     if isinstance(training, str) and isinstance(testing, str):
-        if isinstance(validation, str):
+        if validation is None or isinstance(validation, str):
             return PathDataset(
                 training_path=training,
                 testing_path=testing,
@@ -146,7 +146,12 @@ def get_dataset(
             validation=validation,
         )
 
-    raise TypeError('Training and testing must both be given as strings or Triples Factories')
+    raise TypeError(
+        f'''Training and testing must both be given as strings or Triples Factories.
+        - Training: {type(training)}: {training}
+        - Testing: {type(testing)}: {testing}
+        ''',
+    )
 
 
 def has_dataset(key: str) -> bool:
diff --git a/src/pykeen/datasets/base.py b/src/pykeen/datasets/base.py
index d71cdaa9c1..61080fd93c 100644
--- a/src/pykeen/datasets/base.py
+++ b/src/pykeen/datasets/base.py
@@ -183,7 +183,6 @@ def validation(self) -> TriplesFactory:  # type:ignore # noqa: D401
             self._load()
         if not self._loaded_validation:
             self._load_validation()
-        assert self._validation is not None
         return self._validation
 
     @property
@@ -269,14 +268,17 @@ def _load_validation(self) -> None:
         # don't call this function by itself. assumes called through the `validation`
         # property and the _training factory has already been loaded
         assert self._training is not None
-        self._validation = TriplesFactory.from_path(
-            path=self.validation_path,
-            entity_to_id=self._training.entity_to_id,  # share entity index with training
-            relation_to_id=self._training.relation_to_id,  # share relation index with training
-            # do not explicitly create inverse triples for testing; this is handled by the evaluation code
-            create_inverse_triples=False,
-            load_triples_kwargs=self.load_triples_kwargs,
-        )
+        if self.validation_path is None:
+            self._validation = None
+        else:
+            self._validation = TriplesFactory.from_path(
+                path=self.validation_path,
+                entity_to_id=self._training.entity_to_id,  # share entity index with training
+                relation_to_id=self._training.relation_to_id,  # share relation index with training
+                # do not explicitly create inverse triples for testing; this is handled by the evaluation code
+                create_inverse_triples=False,
+                load_triples_kwargs=self.load_triples_kwargs,
+            )
 
     def __repr__(self) -> str:  # noqa: D105
         return (
diff --git a/src/pykeen/typing.py b/src/pykeen/typing.py
index c81822b951..0daabad014 100644
--- a/src/pykeen/typing.py
+++ b/src/pykeen/typing.py
@@ -44,9 +44,13 @@ def cast_constrainer(f) -> Constrainer:
     return cast(Constrainer, f)
 
 
+#: A hint for a :class:`torch.device`
 DeviceHint = Union[None, str, torch.device]
+#: A hint for a :class:`torch.Generator`
 TorchRandomHint = Union[None, int, torch.Generator]
 
+#: A type variable for head representations used in :class:`pykeen.models.Model`,
+#: :class:`pykeen.nn.modules.Interaction`, etc.
 HeadRepresentation = TypeVar("HeadRepresentation", bound=Union[torch.FloatTensor, Sequence[torch.FloatTensor]])
 RelationRepresentation = TypeVar("RelationRepresentation", bound=Union[torch.FloatTensor, Sequence[torch.FloatTensor]])
 TailRepresentation = TypeVar("TailRepresentation", bound=Union[torch.FloatTensor, Sequence[torch.FloatTensor]])

From 24c63665ee1f0e65c40a8560269537a6cd0ae6fd Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 14:43:45 +0100
Subject: [PATCH 02/18] Update more examples

---
 docs/source/tutorial/byod.rst | 28 +++++++++++++---------------
 src/pykeen/pipeline.py        |  8 +++++++-
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/docs/source/tutorial/byod.rst b/docs/source/tutorial/byod.rst
index 1d03387e47..8144e4624d 100644
--- a/docs/source/tutorial/byod.rst
+++ b/docs/source/tutorial/byod.rst
@@ -1,7 +1,9 @@
 Bring Your Own Data
 ===================
 As an alternative to using a pre-packaged dataset, the training and testing can be set explicitly
-by file path or with instances of :class:`pykeen.triples.TriplesFactory`.
+by file path or with instances of :class:`pykeen.triples.TriplesFactory`. Throughout this
+tutorial, the paths to the training, testing, and validation sets for built-in
+:class:`pykeen.datasets.Nations` will be used as examples.
 
 Pre-stratified Dataset
 ----------------------
@@ -27,19 +29,16 @@ share the same mapping.
 This is equally applicable for the :func:`pykeen.hpo.hpo_pipeline`, which has a similar interface to
 the :func:`pykeen.pipeline.pipeline` as in:
 
-.. code-block:: python
-
-    from pykeen.triples import TriplesFactory
-    from pykeen.hpo import hpo_pipeline
-    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
-
-    result = hpo_pipeline(
-        n_trials=30,
-        training=NATIONS_TRAIN_PATH,
-        testing=NATIONS_TEST_PATH,
-        model='TransE',
-    )
-    result.save_to_directory('test_hpo_pre_stratified_transe')
+>>> import pystow
+>>> from pykeen.hpo import hpo_pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
+>>> result = hpo_pipeline(
+...     n_trials=3,  # you probably want more than this
+...     training=NATIONS_TRAIN_PATH,
+...     testing=NATIONS_TEST_PATH,
+...     model='TransE',
+... )
+>>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_hpo_pre_stratified_transe'))
 
 The remainder of the examples will be for :func:`pykeen.pipeline.pipeline`, but all work exactly the same
 for :func:`pykeen.hpo.hpo_pipeline`.
@@ -49,7 +48,6 @@ to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse
 
 .. code-block:: python
 
-    from pykeen.triples import TriplesFactory
     from pykeen.pipeline import pipeline
     from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
 
diff --git a/src/pykeen/pipeline.py b/src/pykeen/pipeline.py
index f5375ac95f..eab5371faf 100644
--- a/src/pykeen/pipeline.py
+++ b/src/pykeen/pipeline.py
@@ -174,6 +174,7 @@
 import pickle
 import time
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any, Collection, Dict, Iterable, List, Mapping, Optional, Set, Type, Union
 
 import pandas as pd
@@ -423,7 +424,12 @@ def _get_results(self) -> Mapping[str, Any]:
             results['stopper'] = self.stopper.get_summary_dict()
         return results
 
-    def save_to_directory(self, directory: str, save_metadata: bool = True, save_replicates: bool = True) -> None:
+    def save_to_directory(
+        self,
+        directory: Union[str, Path],
+        save_metadata: bool = True,
+        save_replicates: bool = True,
+    ) -> None:
         """Save all artifacts in the given directory."""
         os.makedirs(directory, exist_ok=True)
 

From ae593c4e7486bafadc83c6e3139d95180619f6f4 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 14:55:54 +0100
Subject: [PATCH 03/18] Add doctests

---
 tox.ini | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tox.ini b/tox.ini
index 84b4400ad0..3cb2d0cef4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -17,6 +17,7 @@ envlist =
     doc8
     docs
     # the actual tests
+    doctests
     py
     integration
     # always keep coverage-report last
@@ -49,6 +50,19 @@ deps =
 extras =
     mlflow
 
+[testenv:doctests]
+commands =
+    # TODO make this automatic for all RST in a loop (but not using xargs since doctest uses multiprocessing)
+    python -m doctest docs/source/tutorial/byod.rst
+    #python -m doctest docs/source/tutorial/checkpoints.rst
+    #python -m doctest docs/source/tutorial/first_steps.rst
+    #python -m doctest docs/source/tutorial/making_predictions.rst
+    #python -m doctest docs/source/tutorial/performance.rst
+    #python -m doctest docs/source/tutorial/running_ablation.rst
+    #python -m doctest docs/source/tutorial/running_hpo.rst
+    #python -m doctest docs/source/tutorial/translational_toy_example.rst
+    #python -m doctest docs/source/tutorial/understanding_evaluation.rst
+
 [testenv:coverage-clean]
 deps = coverage
 skip_install = true

From 23175c11b67ff47d67a1d654bc3f1fe535c9ab75 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 15:14:34 +0100
Subject: [PATCH 04/18] Update doctests

---
 .gitignore                    |   1 +
 docs/source/tutorial/byod.rst | 169 ++++++++++++++++------------------
 2 files changed, 80 insertions(+), 90 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7bf7dfe10a..a30a2480d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,3 +117,4 @@ docs/source/api/*
 scratch/*
 wandb/*
 mlruns
+doctests/
diff --git a/docs/source/tutorial/byod.rst b/docs/source/tutorial/byod.rst
index 8144e4624d..71bb07fe5b 100644
--- a/docs/source/tutorial/byod.rst
+++ b/docs/source/tutorial/byod.rst
@@ -11,7 +11,6 @@ You've got a training and testing file as 3-column TSV files, all ready to go. Y
 any entities or relations appearing in the testing set that don't appear in the training set. Load them in the
 pipeline like this:
 
->>> import pystow
 >>> from pykeen.triples import TriplesFactory
 >>> from pykeen.pipeline import pipeline
 >>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
@@ -19,8 +18,9 @@ pipeline like this:
 ...     training=NATIONS_TRAIN_PATH,
 ...     testing=NATIONS_TEST_PATH,
 ...     model='TransE',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
 ... )
->>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_pre_stratified_transe'))
+>>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
 PyKEEN will take care of making sure that the entities are mapped from their labels to appropriate integer
 (technically, 0-dimensional :class:`torch.LongTensor`) indexes and that the different sets of triples
@@ -29,16 +29,17 @@ share the same mapping.
 This is equally applicable for the :func:`pykeen.hpo.hpo_pipeline`, which has a similar interface to
 the :func:`pykeen.pipeline.pipeline` as in:
 
->>> import pystow
 >>> from pykeen.hpo import hpo_pipeline
->>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH, NATIONS_VALIDATE_PATH
 >>> result = hpo_pipeline(
 ...     n_trials=3,  # you probably want more than this
 ...     training=NATIONS_TRAIN_PATH,
 ...     testing=NATIONS_TEST_PATH,
+...     validation=NATIONS_VALIDATE_PATH,
 ...     model='TransE',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
 ... )
->>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_hpo_pre_stratified_transe'))
+>>> result.save_to_directory('doctests/test_hpo_pre_stratified_transe')
 
 The remainder of the examples will be for :func:`pykeen.pipeline.pipeline`, but all work exactly the same
 for :func:`pykeen.hpo.hpo_pipeline`.
@@ -46,41 +47,36 @@ for :func:`pykeen.hpo.hpo_pipeline`.
 If you want to add dataset-wide arguments, you can use the ``dataset_kwargs`` argument
 to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse_triples=True``.
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
-
-    result = pipeline(
-        training=NATIONS_TRAIN_PATH,
-        testing=NATIONS_TEST_PATH,
-        dataset_kwargs={'create_inverse_triples': True},
-        model='TransE',
-    )
-    result.save_to_directory('test_pre_stratified_transe')
+>>> from pykeen.pipeline import pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
+>>> result = pipeline(
+...     training=NATIONS_TRAIN_PATH,
+...     testing=NATIONS_TEST_PATH,
+...     dataset_kwargs={'create_inverse_triples': True},
+...     model='TransE',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+... )
+>>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
 If you want finer control over how the triples are created, for example, if they are not all coming from
 TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface.
 
-.. code-block:: python
-
-    from pykeen.triples import TriplesFactory
-    from pykeen.pipeline import pipeline
-    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
-
-    training = TriplesFactory(path=NATIONS_TRAIN_PATH)
-    testing = TriplesFactory(
-        path=NATIONS_TEST_PATH,
-        entity_to_id=training.entity_to_id,
-        relation_to_id=training.relation_to_id,
-    )
-
-    result = pipeline(
-        training_triples_factory=training,
-        testing_triples_factory=testing,
-        model='TransE',
-    )
-    pipeline_result.save_to_directory('test_pre_stratified_transe')
+>>> from pykeen.triples import TriplesFactory
+>>> from pykeen.pipeline import pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
+>>> training = TriplesFactory.from_path(NATIONS_TRAIN_PATH)
+>>> testing = TriplesFactory.from_path(
+...     NATIONS_TEST_PATH,
+...     entity_to_id=training.entity_to_id,
+...     relation_to_id=training.relation_to_id,
+... )
+>>> result = pipeline(
+...     training=training,
+...     testing=testing,
+...     model='TransE',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+... )
+>>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
 .. warning::
 
@@ -94,29 +90,26 @@ The ``dataset_kwargs`` argument is ignored when passing your own :class:`pykeen.
 sure to include the ``create_inverse_triples=True`` in the instantiation of those classes if that's your
 desired behavior as in:
 
-.. code-block:: python
-
-    from pykeen.triples import TriplesFactory
-    from pykeen.pipeline import pipeline
-    from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
-
-    training = TriplesFactory(
-        path=NATIONS_TRAIN_PATH,
-        create_inverse_triples=True,
-    )
-    testing = TriplesFactory(
-        path=NATIONS_TEST_PATH,
-        entity_to_id=training.entity_to_id,
-        relation_to_id=training.relation_to_id,
-        create_inverse_triples=True,
-    )
-
-    result = pipeline(
-        training_triples_factory=training,
-        testing_triples_factory=testing,
-        model='TransE',
-    )
-    result.save_to_directory('test_pre_stratified_transe')
+>>> from pykeen.triples import TriplesFactory
+>>> from pykeen.pipeline import pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH
+>>> training = TriplesFactory.from_path(
+...     NATIONS_TRAIN_PATH,
+...     create_inverse_triples=True,
+... )
+>>> testing = TriplesFactory.from_path(
+...     NATIONS_TEST_PATH,
+...     entity_to_id=training.entity_to_id,
+...     relation_to_id=training.relation_to_id,
+...     create_inverse_triples=True,
+... )
+>>> result = pipeline(
+...     training=training,
+...     testing=testing,
+...     model='TransE',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+... )
+>>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
 Triples factories can also be instantiated using the ``triples`` keyword argument instead of the ``path`` argument
 if you already have triples loaded in a :class:`numpy.ndarray`.
@@ -127,38 +120,34 @@ It's more realistic your real-world dataset is not already stratified into train
 PyKEEN has you covered with :func:`pykeen.triples.TriplesFactory.split`, which will allow you to create
 a stratified dataset.
 
-.. code-block:: python
-
-    from pykeen.triples import TriplesFactory
-    from pykeen.pipeline import pipeline
-    from pykeen.datasets.nations import NATIONS_TRAIN_PATH
-
-    tf = TriplesFactory(path=NATIONS_TRAIN_PATH)
-    training, testing = tf.split()
-
-    result = pipeline(
-        training_triples_factory=training,
-        testing_triples_factory=testing,
-        model='TransE',
-    )
-    pipeline_result.save_to_directory('test_unstratified_transe')
+>>> from pykeen.triples import TriplesFactory
+>>> from pykeen.pipeline import pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH
+>>> tf = TriplesFactory.from_path(NATIONS_TRAIN_PATH)
+>>> training, testing = tf.split()
+>>> result = pipeline(
+...     training=training,
+...     testing=testing,
+...     model='TransE',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+... )
+>>> result.save_to_directory('doctests/test_unstratified_transe')
 
 By default, this is an 80/20 split. If you want to use early stopping, you'll also need a validation set, so
 you should specify the splits:
 
-.. code-block:: python
-
-    from pykeen.triples import TriplesFactory
-    from pykeen.pipeline import pipeline
-
-    tf = TriplesFactory(path=...)
-    training, testing, validation = tf.split([.8, .1, .1])
-
-    result = pipeline(
-        training_triples_factory=training,
-        testing_triples_factory=testing,
-        validation_triples_factory=validation,
-        model='TransE',
-        stopper='early',
-    )
-    pipeline_result.save_to_directory('test_unstratified_stopped_transe')
+>>> from pykeen.triples import TriplesFactory
+>>> from pykeen.pipeline import pipeline
+>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH
+>>> tf = TriplesFactory.from_path(NATIONS_TRAIN_PATH)
+>>> training, testing, validation = tf.split([.8, .1, .1])
+>>> result = pipeline(
+...     training=training,
+...     testing=testing,
+...     validation=validation,
+...     model='TransE',
+...     stopper='early',
+...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go
+...                                          # higher, especially with early stopper enabled
+... )
+>>> result.save_to_directory('doctests/test_unstratified_stopped_transe')

From 8fea70f048feb4959dfd518e9cec1a784c153b8b Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 16:59:12 +0100
Subject: [PATCH 05/18] Add doctests to GHA

Trigger CI
---
 .github/workflows/tests.yml        | 2 +-
 .github/workflows/tests_master.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 90ce0b5fbf..c67f555c44 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -84,7 +84,7 @@ jobs:
       - name: Run fast tests
         run: tox -e py
       - name: Run slow tests
-        run: tox -e integration
+        run: tox -e integration,doctests
   windows:
     if: "contains(github.event.head_commit.message, 'Trigger CI')"
     name: Windows
diff --git a/.github/workflows/tests_master.yml b/.github/workflows/tests_master.yml
index eb11b184bb..28a5828a17 100644
--- a/.github/workflows/tests_master.yml
+++ b/.github/workflows/tests_master.yml
@@ -83,7 +83,7 @@ jobs:
       - name: Run fast tests
         run: tox -e py
       - name: Run slow tests
-        run: tox -e integration
+        run: tox -e integration,doctests
   windows:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
     name: Windows

From 237ff6dffa355ea7af447f88df1135628f2d2543 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:00:18 +0100
Subject: [PATCH 06/18] Update tutorials

---
 docs/source/tutorial/checkpoints.rst        | 223 +++++++++-----------
 docs/source/tutorial/making_predictions.rst |  55 ++---
 tox.ini                                     |  12 +-
 3 files changed, 124 insertions(+), 166 deletions(-)

diff --git a/docs/source/tutorial/checkpoints.rst b/docs/source/tutorial/checkpoints.rst
index 157e0bfde7..3d30eb940d 100644
--- a/docs/source/tutorial/checkpoints.rst
+++ b/docs/source/tutorial/checkpoints.rst
@@ -17,55 +17,46 @@ Regular Checkpoints
 The tutorial :ref:`first_steps` showed how the :func:`pykeen.pipeline.pipeline` function can be used to set up an entire
 KGEM for training and evaluation in just two lines of code. A slightly extended example is shown below:
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(
-            num_epochs=1000,
-        ),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=1000,
+...     ),
+... )
 
 To enable checkpoints, all you have to do is add a ``checkpoint_name`` argument to the ``training_kwargs``.
 This argument should have the name you would like the checkpoint files saved on your computer to be called.
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(
-            num_epochs=1000,
-            checkpoint_name='my_checkpoint.pt',
-        ),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=1000,
+...         checkpoint_name='my_checkpoint.pt',
+...     ),
+... )
 
 Furthermore, you can set the checkpoint frequency, i.e. how often checkpoints should be saved given in minutes, by
 setting the argument ``checkpoint_frequency`` with an integer. The default frequency is 30 minutes and setting it to
 ``0`` will cause the training loop to save a checkpoint after each epoch.
 Let's look at an example.
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(
-            num_epochs=1000,
-            checkpoint_name='my_checkpoint.pt',
-            checkpoint_frequency=5,
-        ),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=1000,
+...         checkpoint_name='my_checkpoint.pt',
+...         checkpoint_frequency=5,
+...     ),
+... )
 
 Here we have defined a pipeline that will save training loop checkpoints in the checkpoint file called
 ``my_checkpoint.pt`` every time an epoch finishes and at least `5` minutes have passed since saving previously.
@@ -78,20 +69,17 @@ or the early stopper stops it. Assuming that you successfully trained the KGEM a
 that you would like to test the model with `2000` epochs, all you have to do is to change the number of epochs and
 execute the code like:
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(
-            num_epochs=2000,  # more epochs than before
-            checkpoint_name='my_checkpoint.pt',
-            checkpoint_frequency=5,
-        ),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=2000,  # more epochs than before
+...         checkpoint_name='my_checkpoint.pt',
+...         checkpoint_frequency=5,
+...     ),
+... )
 
 The above code will load the saved state after finishing `1000` epochs and continue to train to `2000` epochs, giving
 the exact same results as if you would have run it for `2000` epochs in the first place.
@@ -101,20 +89,17 @@ which is a subdirectory in your home directory, e.g. ``~/.data/pykeen/checkpoint
 Optionally, you can set the path to where you want the checkpoints to be saved by setting the ``checkpoint_directory``
 argument with a string or a :class:`pathlib.Path` object containing your desired root path, as shown in this example:
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(
-            num_epochs=2000,
-            checkpoint_name='my_checkpoint.pt',
-            checkpoint_directory='/my/secret/dir',
-        ),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=2000,
+...         checkpoint_name='my_checkpoint.pt',
+...         checkpoint_directory='doctests/checkpoint_dir',
+...     ),
+... )
 
 .. _failure_checkpoints_how_to:
 
@@ -123,16 +108,16 @@ Checkpoints on Failure
 In cases where you only would like to save checkpoints whenever the training loop might fail, you can use the argument
 ``checkpoint_on_failure=True``, like:
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(num_epochs=2000, checkpoint_on_failure=True),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=2000,
+...         checkpoint_on_failure=True,
+...     ),
+... )
 
 This option differs from regular checkpoints, since regular checkpoints are only saved
 after a successful epoch. When saving checkpoints due to failure of the training loop there is no guarantee that all
@@ -141,19 +126,17 @@ specific training loop. Therefore, these checkpoints are saved with a distinct c
 ``PyKEEN_just_saved_my_day_{datetime}.pt`` in the given ``checkpoint_directory``, even when you also opted to use
 regular checkpoints as defined above, e.g. with this code:
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-    pipeline_result = pipeline(
-        dataset='Nations',
-        model='TransE',
-        optimizer='Adam',
-        training_kwargs=dict(
-            num_epochs=2000,
-            checkpoint_name='my_checkpoint.pt',
-            checkpoint_on_failure=True,
-        ),
-    )
+>>> from pykeen.pipeline import pipeline
+>>> pipeline_result = pipeline(
+...     dataset='Nations',
+...     model='TransE',
+...     optimizer='Adam',
+...     training_kwargs=dict(
+...         num_epochs=2000,
+...         checkpoint_name='my_checkpoint.pt',
+...         checkpoint_on_failure=True,
+...     ),
+... )
 
 Note: Use this argument with caution, since every failed training loop will create a distinct checkpoint file.
 
@@ -195,19 +178,17 @@ To show how to use the checkpoint functionality without the pipeline, we define
 
 .. code-block:: python
 
-    from pykeen.models import TransE
-    from pykeen.training import SLCWATrainingLoop
-    from pykeen.triples import TriplesFactory
-    from torch.optim import Adam
-
-    triples_factory = Nations().training
-    model = TransE(
-        triples_factory=triples_factory,
-        random_seed=123,
-    )
-
-    optimizer = Adam(params=model.get_grad_params())
-    training_loop = SLCWATrainingLoop(model=model, optimizer=optimizer)
+>>> from pykeen.models import TransE
+>>> from pykeen.training import SLCWATrainingLoop
+>>> from pykeen.triples import TriplesFactory
+>>> from torch.optim import Adam
+>>> triples_factory = Nations().training
+>>> model = TransE(
+...     triples_factory=triples_factory,
+...     random_seed=123,
+... )
+>>> optimizer = Adam(params=model.get_grad_params())
+>>> training_loop = SLCWATrainingLoop(model=model, optimizer=optimizer)
 
 At this point we have a model, dataset and optimizer all setup in a training loop and are ready to train the model with
 the ``training_loop``'s method :func:`pykeen.training.TrainingLoop.train`. To enable checkpoints all you have to do is
@@ -222,13 +203,11 @@ argument with a string or a :class:`pathlib.Path` object containing your desired
 
 Here is an example:
 
-.. code-block:: python
-
-    losses = training_loop.train(
-        num_epochs=1000,
-        checkpoint_name='my_checkpoint.pt',
-        checkpoint_frequency=5,
-    )
+>>> losses = training_loop.train(
+...     num_epochs=1000,
+...     checkpoint_name='my_checkpoint.pt',
+...     checkpoint_frequency=5,
+... )
 
 With this code we have started the training loop with the above defined KGEM. The training loop will save a checkpoint
 in the ``my_checkpoint.pt`` file, which will be saved in the ``~/.data/pykeen/checkpoints/`` directory, since we haven't
@@ -249,26 +228,22 @@ E.g. the above training loop finished successfully after 1000 epochs, but you wo
 train the same model from that state for 2000 epochs. All you have have to do is to change the argument
 ``num_epochs`` in the above code to:
 
-.. code-block:: python
-
-    losses = training_loop.train(
-        num_epochs=2000,
-        checkpoint_name='my_checkpoint.pt',
-        checkpoint_frequency=5,
-    )
+>>> losses = training_loop.train(
+...     num_epochs=2000,
+...     checkpoint_name='my_checkpoint.pt',
+...     checkpoint_frequency=5,
+... )
 
 and now the training loop will resume from the state at 1000 epochs and continue to train until 2000 epochs.
 
 As shown in :ref:`failure_checkpoints_how_to`, you can also save checkpoints only in cases where the
 training loop fails. To do this you just have to set the argument `checkpoint_on_failure=True`, like:
 
-.. code-block:: python
-
-    losses = training_loop.train(
-        num_epochs=2000,
-        checkpoint_directory='/my/secret/dir',
-        checkpoint_on_failure=True,
-    )
+>>> losses = training_loop.train(
+...     num_epochs=2000,
+...     checkpoint_directory='/my/secret/dir',
+...     checkpoint_on_failure=True,
+... )
 
 This code will save a checkpoint in case the training loop fails. Note how we also chose a new checkpoint directory by
 setting the `checkpoint_directory` argument to ``/my/secret/dir``.
diff --git a/docs/source/tutorial/making_predictions.rst b/docs/source/tutorial/making_predictions.rst
index 3e10337107..5744984674 100644
--- a/docs/source/tutorial/making_predictions.rst
+++ b/docs/source/tutorial/making_predictions.rst
@@ -26,30 +26,22 @@ This example shows using the :func:`pykeen.pipeline.pipeline` to train a model
 which will already be in memory. Each of the high-level interfaces are exposed through the
 model:
 
-.. code-block:: python
-
-    from pykeen.pipeline import pipeline
-
-    pipeline_result = pipeline(dataset='Nations', model='RotatE')
-    model = pipeline_result.model
-
-    # Predict tails
-    predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs')
-
-    # Predict relations
-    predicted_relations_df = model.get_relation_prediction_df('brazil', 'uk')
-
-    # Predict heads
-    predicted_heads_df = model.get_head_prediction_df('conferences', 'brazil')
-
-    # Score all triples (memory intensive)
-    predictions_df = model.get_all_prediction_df()
-
-    # Score top K triples
-    predictions_df = model.get_all_prediction_df(k=150)
-
-    # save the model
-    pipeline_result.save_to_directory('nations_rotate')
+>>> from pykeen.pipeline import pipeline
+>>> # Run the pipeline
+>>> pipeline_result = pipeline(dataset='Nations', model='RotatE')
+>>> model = pipeline_result.model
+>>> # Predict tails
+>>> predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs')
+>>> # Predict relations
+>>> predicted_relations_df = model.get_relation_prediction_df('brazil', 'uk')
+>>> # Predict heads
+>>> predicted_heads_df = model.get_head_prediction_df('conferences', 'brazil')
+>>> # Score all triples (memory intensive)
+>>> predictions_df = model.get_all_prediction_df()
+>>> # Score top K triples
+>>> top_k_predictions_df = model.get_all_prediction_df(k=150)
+>>> # save the model
+>>> pipeline_result.save_to_directory('doctests/nations_rotate')
 
 Loading a Model
 ~~~~~~~~~~~~~~~
@@ -58,16 +50,11 @@ This example shows how to reload a previously trained model. The
 a file named ``trained_model.pkl``, so we will use the one from the
 previous example.
 
-.. code-block:: python
-
-    import torch
-
-    model = torch.load('nations_rotate/trained_model.pkl')
-
-    # Predict tails
-    predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs')
-
-    # everything else is the same as above
+>>> import torch
+>>> model = torch.load('doctests/nations_rotate/trained_model.pkl')
+>>> # Predict tails
+>>> predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs')
+>>> # everything else is the same as above
 
 There's an example model available at
 https://github.com/pykeen/pykeen/blob/master/notebooks/hello_world/nations_transe/trained_model.pkl
diff --git a/tox.ini b/tox.ini
index 3cb2d0cef4..a1b7b76d99 100644
--- a/tox.ini
+++ b/tox.ini
@@ -53,15 +53,11 @@ extras =
 [testenv:doctests]
 commands =
     # TODO make this automatic for all RST in a loop (but not using xargs since doctest uses multiprocessing)
+    python -m doctest docs/source/tutorial/first_steps.rst
     python -m doctest docs/source/tutorial/byod.rst
-    #python -m doctest docs/source/tutorial/checkpoints.rst
-    #python -m doctest docs/source/tutorial/first_steps.rst
-    #python -m doctest docs/source/tutorial/making_predictions.rst
-    #python -m doctest docs/source/tutorial/performance.rst
-    #python -m doctest docs/source/tutorial/running_ablation.rst
-    #python -m doctest docs/source/tutorial/running_hpo.rst
-    #python -m doctest docs/source/tutorial/translational_toy_example.rst
-    #python -m doctest docs/source/tutorial/understanding_evaluation.rst
+    python -m doctest docs/source/tutorial/making_predictions.rst
+    # python -m doctest src/pykeen/pipeline.py
+    # python -m doctest src/pykeen/hpo/__init__.py
 
 [testenv:coverage-clean]
 deps = coverage

From 7846d6927423bfae09841805a7eadbfe7d384646 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:05:54 +0100
Subject: [PATCH 07/18] Pass mypy

---
 src/pykeen/datasets/base.py    | 4 ++--
 src/pykeen/datasets/dbpedia.py | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/pykeen/datasets/base.py b/src/pykeen/datasets/base.py
index 61080fd93c..ec368d09ed 100644
--- a/src/pykeen/datasets/base.py
+++ b/src/pykeen/datasets/base.py
@@ -177,7 +177,7 @@ def testing(self) -> TriplesFactory:  # type:ignore # noqa: D401
         return self._testing
 
     @property
-    def validation(self) -> TriplesFactory:  # type:ignore # noqa: D401
+    def validation(self) -> Optional[TriplesFactory]:  # type:ignore # noqa: D401
         """The validation triples factory that shares indices with the training triples factory."""
         if not self._loaded:
             self._load()
@@ -223,7 +223,7 @@ def __init__(
         self,
         training_path: Union[str, TextIO],
         testing_path: Union[str, TextIO],
-        validation_path: Union[str, TextIO],
+        validation_path: Union[None, str, TextIO],
         eager: bool = False,
         create_inverse_triples: bool = False,
         load_triples_kwargs: Optional[Mapping[str, Any]] = None,
diff --git a/src/pykeen/datasets/dbpedia.py b/src/pykeen/datasets/dbpedia.py
index d508d9154f..404620519d 100644
--- a/src/pykeen/datasets/dbpedia.py
+++ b/src/pykeen/datasets/dbpedia.py
@@ -45,8 +45,4 @@ def __init__(self, create_inverse_triples: bool = False, **kwargs):
 
 
 if __name__ == '__main__':
-    _d = DBpedia50()
-    _d.summarize()
-    print(_d.training.triples[:5])
-    print(_d.testing.triples[:5])
-    print(_d.validation.triples[:5])
+    DBpedia50().summarize()

From 76c1b4d0e960c1266244c3b738bca461b0f08578 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:09:32 +0100
Subject: [PATCH 08/18] Update README acknowledgement

---
 README.md                      | 2 +-
 src/pykeen/templates/README.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6697258ba8..c58fd6fc27 100644
--- a/README.md
+++ b/README.md
@@ -300,7 +300,7 @@ See [CONTRIBUTING.md](/CONTRIBUTING.md) for more information on getting involved
 This project has been supported by several organizations (in alphabetical order):
 
 - [Bayer](https://www.bayer.com/)
-- [Enveda Therapeutics](https://envedatherapeutics.com/)
+- [Enveda Biosciences](https://www.envedabio.com/)
 - [Fraunhofer Institute for Algorithms and Scientific Computing](https://www.scai.fraunhofer.de)
 - [Fraunhofer Institute for Intelligent Analysis and Information Systems](https://www.iais.fraunhofer.de)
 - [Fraunhofer Center for Machine Learning](https://www.cit.fraunhofer.de/de/zentren/maschinelles-lernen.html)
diff --git a/src/pykeen/templates/README.md b/src/pykeen/templates/README.md
index 42fdd76b4e..155b0820eb 100644
--- a/src/pykeen/templates/README.md
+++ b/src/pykeen/templates/README.md
@@ -202,7 +202,7 @@ See [CONTRIBUTING.md](/CONTRIBUTING.md) for more information on getting involved
 This project has been supported by several organizations (in alphabetical order):
 
 - [Bayer](https://www.bayer.com/)
-- [Enveda Therapeutics](https://envedatherapeutics.com/)
+- [Enveda Biosciences](https://www.envedabio.com/)
 - [Fraunhofer Institute for Algorithms and Scientific Computing](https://www.scai.fraunhofer.de)
 - [Fraunhofer Institute for Intelligent Analysis and Information Systems](https://www.iais.fraunhofer.de)
 - [Fraunhofer Center for Machine Learning](https://www.cit.fraunhofer.de/de/zentren/maschinelles-lernen.html)

From 2d616872634d4223b22a30e986bd3a8fdc858006 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:09:37 +0100
Subject: [PATCH 09/18] Update license year

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 81380cad04..f48adb8aad 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2019-2020 PyKEEN Project Team
+Copyright (c) 2019-2021 PyKEEN Project Team
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

From cd65862ad1377e728de210dbdf51d1a099fd7c31 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:09:48 +0100
Subject: [PATCH 10/18] Update AUTHORS.md

Add link to GitHub authors

Trigger CI
---
 AUTHORS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/AUTHORS.md b/AUTHORS.md
index e8a1ae4295..b846ecf502 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -16,3 +16,5 @@
 - [Michael Galkin](https://github.com/migalkin)
 - [Felix Hamann](https://github.com/kantholtz)
 - [Sankranti Joshi](https://github.com/sunny1401)
+
+See also: https://github.com/pykeen/pykeen/graphs/contributors

From 1d28264ec37ba5e6fb260391506549c4cbb053ac Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:18:17 +0100
Subject: [PATCH 11/18] Update typing.py

Co-Authored-By: Stephen Bonner <10208489+sbonner0@users.noreply.github.com>
---
 src/pykeen/typing.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/pykeen/typing.py b/src/pykeen/typing.py
index 0f06dd7cc0..d205003a16 100644
--- a/src/pykeen/typing.py
+++ b/src/pykeen/typing.py
@@ -17,6 +17,7 @@
     'MappedTriples',
     'EntityMapping',
     'RelationMapping',
+    # Tensor Functions
     'Initializer',
     'Normalizer',
     'Constrainer',
@@ -24,6 +25,8 @@
     'InteractionFunction',
     'DeviceHint',
     'TorchRandomHint',
+    # Tensors
+    'TensorType',
     'HeadRepresentation',
     'RelationRepresentation',
     'TailRepresentation',
@@ -34,6 +37,7 @@
 
 X = TypeVar('X')
 Hint = Union[None, str, X]
+#: A function that mutates the input and returns a new object of the same type as output
 Mutation = Callable[[X], X]
 OneOrSequence = Union[X, Sequence[X]]
 
@@ -42,12 +46,17 @@
 EntityMapping = Mapping[str, int]
 RelationMapping = Mapping[str, int]
 
-# comment: TypeVar expects none, or at least two super-classes
+#: A type variable bound by :class:`torch.Tensor` or :class:`torch.FloatTensor`.
 TensorType = TypeVar("TensorType", torch.Tensor, torch.FloatTensor)
+#: An interaction function takes in head tensor(s), relation tensor(s), and tail tensor(s)
+#: then returns a score
 InteractionFunction = Callable[[TensorType, TensorType, TensorType], TensorType]
 
+#: A function that can be applied to a tensor to initialize it
 Initializer = Mutation[TensorType]
+#: A function that can be applied to a tensor to normalize it
 Normalizer = Mutation[TensorType]
+#: A function that can be applied to a tensor to constrain it
 Constrainer = Mutation[TensorType]
 
 
@@ -64,7 +73,11 @@ def cast_constrainer(f) -> Constrainer:
 #: A type variable for head representations used in :class:`pykeen.models.Model`,
 #: :class:`pykeen.nn.modules.Interaction`, etc.
 HeadRepresentation = TypeVar("HeadRepresentation", bound=OneOrSequence[torch.FloatTensor])
+#: A type variable for relation representations used in :class:`pykeen.models.Model`,
+#: :class:`pykeen.nn.modules.Interaction`, etc.
 RelationRepresentation = TypeVar("RelationRepresentation", bound=OneOrSequence[torch.FloatTensor])
+#: A type variable for tail representations used in :class:`pykeen.models.Model`,
+#: :class:`pykeen.nn.modules.Interaction`, etc.
 TailRepresentation = TypeVar("TailRepresentation", bound=OneOrSequence[torch.FloatTensor])
 
 

From 9910025e18624276f973a6205455a8362baed533 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:25:51 +0100
Subject: [PATCH 12/18] Update typing.py

Trigger CI

Co-Authored-By: Stephen Bonner <10208489+sbonner0@users.noreply.github.com>
---
 src/pykeen/typing.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/pykeen/typing.py b/src/pykeen/typing.py
index d205003a16..d2d0b09159 100644
--- a/src/pykeen/typing.py
+++ b/src/pykeen/typing.py
@@ -12,21 +12,20 @@
     'Hint',
     'Mutation',
     'OneOrSequence',
-    # Others
+    # Triples
     'LabeledTriples',
     'MappedTriples',
     'EntityMapping',
     'RelationMapping',
+    # Others
+    'DeviceHint',
+    'TorchRandomHint',
     # Tensor Functions
     'Initializer',
     'Normalizer',
     'Constrainer',
     'cast_constrainer',
-    'InteractionFunction',
-    'DeviceHint',
-    'TorchRandomHint',
     # Tensors
-    'TensorType',
     'HeadRepresentation',
     'RelationRepresentation',
     'TailRepresentation',
@@ -46,18 +45,12 @@
 EntityMapping = Mapping[str, int]
 RelationMapping = Mapping[str, int]
 
-#: A type variable bound by :class:`torch.Tensor` or :class:`torch.FloatTensor`.
-TensorType = TypeVar("TensorType", torch.Tensor, torch.FloatTensor)
-#: An interaction function takes in head tensor(s), relation tensor(s), and tail tensor(s)
-#: then returns a score
-InteractionFunction = Callable[[TensorType, TensorType, TensorType], TensorType]
-
 #: A function that can be applied to a tensor to initialize it
-Initializer = Mutation[TensorType]
+Initializer = Mutation[torch.FloatTensor]
 #: A function that can be applied to a tensor to normalize it
-Normalizer = Mutation[TensorType]
+Normalizer = Mutation[torch.FloatTensor]
 #: A function that can be applied to a tensor to constrain it
-Constrainer = Mutation[TensorType]
+Constrainer = Mutation[torch.FloatTensor]
 
 
 def cast_constrainer(f) -> Constrainer:

From 15a635585e691dc054fd234c8c894bff5b3f998e Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 20:29:45 +0100
Subject: [PATCH 13/18] Update checkpoints.rst

Trigger CI

Co-Authored-By: Stephen Bonner <10208489+sbonner0@users.noreply.github.com>
---
 docs/source/tutorial/checkpoints.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/source/tutorial/checkpoints.rst b/docs/source/tutorial/checkpoints.rst
index 3d30eb940d..cfa550020e 100644
--- a/docs/source/tutorial/checkpoints.rst
+++ b/docs/source/tutorial/checkpoints.rst
@@ -176,8 +176,6 @@ the same compared to running uninterrupted without checkpoints, also for the eva
 
 To show how to use the checkpoint functionality without the pipeline, we define a KGEM first:
 
-.. code-block:: python
-
 >>> from pykeen.models import TransE
 >>> from pykeen.training import SLCWATrainingLoop
 >>> from pykeen.triples import TriplesFactory

From ca2eaff0a370998e172334a8812c204a1190df54 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 22:29:34 +0100
Subject: [PATCH 14/18] Split out doctests

Trigger CI
---
 .github/workflows/tests.yml        | 4 +++-
 .github/workflows/tests_master.yml | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c67f555c44..d6a864ded9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -84,7 +84,9 @@ jobs:
       - name: Run fast tests
         run: tox -e py
       - name: Run slow tests
-        run: tox -e integration,doctests
+        run: tox -e integration
+      - name: Run doctests
+        run: tox -e doctests
   windows:
     if: "contains(github.event.head_commit.message, 'Trigger CI')"
     name: Windows
diff --git a/.github/workflows/tests_master.yml b/.github/workflows/tests_master.yml
index 28a5828a17..9719fa7a33 100644
--- a/.github/workflows/tests_master.yml
+++ b/.github/workflows/tests_master.yml
@@ -83,7 +83,9 @@ jobs:
       - name: Run fast tests
         run: tox -e py
       - name: Run slow tests
-        run: tox -e integration,doctests
+        run: tox -e integration
+      - name: Run doctests
+        run: tox -e doctests
   windows:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
     name: Windows

From 13e8fa7c53baba9884ee97cf642727edc955b047 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 22:55:40 +0100
Subject: [PATCH 15/18] =?UTF-8?q?Bump=20version:=201.2.0-dev=20=E2=86=92?=
 =?UTF-8?q?=201.2.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg      | 2 +-
 docs/source/conf.py   | 2 +-
 src/pykeen/version.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 5022316fa8..0df5d6139f 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0-dev
+current_version = 1.2.0
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 39106fb3be..17e16ea185 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -52,7 +52,7 @@
 author = 'PyKEEN Project Team'
 
 # The full version, including alpha/beta/rc tags.
-release = '1.2.0-dev'
+release = '1.2.0'
 
 # The short X.Y version.
 parsed_version = re.match(
diff --git a/src/pykeen/version.py b/src/pykeen/version.py
index 0a27740ef5..09325feb11 100644
--- a/src/pykeen/version.py
+++ b/src/pykeen/version.py
@@ -11,7 +11,7 @@
     'get_git_hash',
 ]
 
-VERSION = '1.2.0-dev'
+VERSION = '1.2.0'
 
 
 def get_git_hash() -> str:

From 1963889b5997a14692c802967e676631d71fb256 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 22:56:14 +0100
Subject: [PATCH 16/18] =?UTF-8?q?Bump=20version:=201.2.0=20=E2=86=92=201.2?=
 =?UTF-8?q?.1-dev?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg      | 2 +-
 docs/source/conf.py   | 2 +-
 src/pykeen/version.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 0df5d6139f..c5f3a17993 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0
+current_version = 1.2.1-dev
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 17e16ea185..d3854dd035 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -52,7 +52,7 @@
 author = 'PyKEEN Project Team'
 
 # The full version, including alpha/beta/rc tags.
-release = '1.2.0'
+release = '1.2.1-dev'
 
 # The short X.Y version.
 parsed_version = re.match(
diff --git a/src/pykeen/version.py b/src/pykeen/version.py
index 09325feb11..068ce5a0a9 100644
--- a/src/pykeen/version.py
+++ b/src/pykeen/version.py
@@ -11,7 +11,7 @@
     'get_git_hash',
 ]
 
-VERSION = '1.2.0'
+VERSION = '1.2.1-dev'
 
 
 def get_git_hash() -> str:

From a9108f4b2b48b60509f3c58d5f98c2af15c62259 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 12 Feb 2021 23:16:02 +0100
Subject: [PATCH 17/18] Bump versions

Trigger CI

following the previous release kerfuffle...
---
 .bumpversion.cfg      | 2 +-
 docs/source/conf.py   | 2 +-
 src/pykeen/version.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index c5f3a17993..aa0ad94822 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.1-dev
+current_version = 1.3.0-dev
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
diff --git a/docs/source/conf.py b/docs/source/conf.py
index d3854dd035..0a7ca9f829 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -52,7 +52,7 @@
 author = 'PyKEEN Project Team'
 
 # The full version, including alpha/beta/rc tags.
-release = '1.2.1-dev'
+release = '1.3.0-dev'
 
 # The short X.Y version.
 parsed_version = re.match(
diff --git a/src/pykeen/version.py b/src/pykeen/version.py
index 068ce5a0a9..a6ffa3c798 100644
--- a/src/pykeen/version.py
+++ b/src/pykeen/version.py
@@ -11,7 +11,7 @@
     'get_git_hash',
 ]
 
-VERSION = '1.2.1-dev'
+VERSION = '1.3.0-dev'
 
 
 def get_git_hash() -> str:

From c9872498f246b6c0dd889283ada2ceb47e9e5e36 Mon Sep 17 00:00:00 2001
From: PyKEEN_bot <pykeen2019@gmail.com>
Date: Fri, 12 Feb 2021 22:22:27 +0000
Subject: [PATCH 18/18] Trigger CI