From 9ff2bf5cc4eb2c4f6a09cc0d7d69c58a3a1ebaf3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 14:36:14 +0100 Subject: [PATCH 01/18] Update docs --- docs/source/reference/constants.rst | 3 ++ docs/source/tutorial/byod.rst | 59 ++++++++++++----------------- src/pykeen/datasets/__init__.py | 9 ++++- src/pykeen/datasets/base.py | 20 +++++----- src/pykeen/typing.py | 4 ++ 5 files changed, 49 insertions(+), 46 deletions(-) diff --git a/docs/source/reference/constants.rst b/docs/source/reference/constants.rst index 678d6016ec..3ca0facdba 100644 --- a/docs/source/reference/constants.rst +++ b/docs/source/reference/constants.rst @@ -2,3 +2,6 @@ Constants ========= .. automodule:: pykeen.constants :members: + +.. automodule:: pykeen.typing + :members: diff --git a/docs/source/tutorial/byod.rst b/docs/source/tutorial/byod.rst index 0947f8ab89..1d03387e47 100644 --- a/docs/source/tutorial/byod.rst +++ b/docs/source/tutorial/byod.rst @@ -9,20 +9,16 @@ You've got a training and testing file as 3-column TSV files, all ready to go. Y any entities or relations appearing in the testing set that don't appear in the training set. Load them in the pipeline like this: -.. code-block:: python - - from pykeen.triples import TriplesFactory - from pykeen.pipeline import pipeline - - training_path: str = ... - testing_path: str = ... - - result = pipeline( - training_triples_factory=training_path, - testing_triples_factory=testing_path, - model='TransE', - ) - result.save_to_directory('test_pre_stratified_transe') +>>> import pystow +>>> from pykeen.triples import TriplesFactory +>>> from pykeen.pipeline import pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH +>>> result = pipeline( +... training=NATIONS_TRAIN_PATH, +... testing=NATIONS_TEST_PATH, +... model='TransE', +... ) +>>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_pre_stratified_transe')) PyKEEN will take care of making sure that the entities are mapped from their labels to appropriate integer (technically, 0-dimensional :class:`torch.LongTensor`) indexes and that the different sets of triples @@ -35,14 +31,12 @@ the :func:`pykeen.pipeline.pipeline` as in: from pykeen.triples import TriplesFactory from pykeen.hpo import hpo_pipeline - - training_path: str = ... - testing_path: str = ... + from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH result = hpo_pipeline( n_trials=30, - training_triples_factory=training_path, - testing_triples_factory=testing_path, + training=NATIONS_TRAIN_PATH, + testing=NATIONS_TEST_PATH, model='TransE', ) result.save_to_directory('test_hpo_pre_stratified_transe') @@ -57,13 +51,11 @@ to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse from pykeen.triples import TriplesFactory from pykeen.pipeline import pipeline - - training_path: str = ... - testing_path: str = ... + from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH result = pipeline( - training_triples_factory=training_path, - testing_triples_factory=testing_path, + training=NATIONS_TRAIN_PATH, + testing=NATIONS_TEST_PATH, dataset_kwargs={'create_inverse_triples': True}, model='TransE', ) @@ -76,13 +68,11 @@ TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface. from pykeen.triples import TriplesFactory from pykeen.pipeline import pipeline + from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH - training_path: str = ... - testing_path: str = ... - - training = TriplesFactory(path=training_path) + training = TriplesFactory(path=NATIONS_TRAIN_PATH) testing = TriplesFactory( - path=testing_path, + path=NATIONS_TEST_PATH, entity_to_id=training.entity_to_id, relation_to_id=training.relation_to_id, ) @@ -110,16 +100,14 @@ desired behavior as in: from pykeen.triples import TriplesFactory from pykeen.pipeline import pipeline - - training_path: str = ... - testing_path: str = ... + from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH training = TriplesFactory( - path=training_path, + path=NATIONS_TRAIN_PATH, create_inverse_triples=True, ) testing = TriplesFactory( - path=testing_path, + path=NATIONS_TEST_PATH, entity_to_id=training.entity_to_id, relation_to_id=training.relation_to_id, create_inverse_triples=True, @@ -145,8 +133,9 @@ a stratified dataset. from pykeen.triples import TriplesFactory from pykeen.pipeline import pipeline + from pykeen.datasets.nations import NATIONS_TRAIN_PATH - tf = TriplesFactory(path=...) + tf = TriplesFactory(path=NATIONS_TRAIN_PATH) training, testing = tf.split() result = pipeline( diff --git a/src/pykeen/datasets/__init__.py b/src/pykeen/datasets/__init__.py index 842374448b..5019000c68 100644 --- a/src/pykeen/datasets/__init__.py +++ b/src/pykeen/datasets/__init__.py @@ -125,7 +125,7 @@ def get_dataset( raise TypeError(f'Dataset is invalid type: {type(dataset)}') if isinstance(training, str) and isinstance(testing, str): - if isinstance(validation, str): + if validation is None or isinstance(validation, str): return PathDataset( training_path=training, testing_path=testing, @@ -146,7 +146,12 @@ def get_dataset( validation=validation, ) - raise TypeError('Training and testing must both be given as strings or Triples Factories') + raise TypeError( + f'''Training and testing must both be given as strings or Triples Factories. + - Training: {type(training)}: {training} + - Testing: {type(testing)}: {testing} + ''', + ) def has_dataset(key: str) -> bool: diff --git a/src/pykeen/datasets/base.py b/src/pykeen/datasets/base.py index d71cdaa9c1..61080fd93c 100644 --- a/src/pykeen/datasets/base.py +++ b/src/pykeen/datasets/base.py @@ -183,7 +183,6 @@ def validation(self) -> TriplesFactory: # type:ignore # noqa: D401 self._load() if not self._loaded_validation: self._load_validation() - assert self._validation is not None return self._validation @property @@ -269,14 +268,17 @@ def _load_validation(self) -> None: # don't call this function by itself. assumes called through the `validation` # property and the _training factory has already been loaded assert self._training is not None - self._validation = TriplesFactory.from_path( - path=self.validation_path, - entity_to_id=self._training.entity_to_id, # share entity index with training - relation_to_id=self._training.relation_to_id, # share relation index with training - # do not explicitly create inverse triples for testing; this is handled by the evaluation code - create_inverse_triples=False, - load_triples_kwargs=self.load_triples_kwargs, - ) + if self.validation_path is None: + self._validation = None + else: + self._validation = TriplesFactory.from_path( + path=self.validation_path, + entity_to_id=self._training.entity_to_id, # share entity index with training + relation_to_id=self._training.relation_to_id, # share relation index with training + # do not explicitly create inverse triples for testing; this is handled by the evaluation code + create_inverse_triples=False, + load_triples_kwargs=self.load_triples_kwargs, + ) def __repr__(self) -> str: # noqa: D105 return ( diff --git a/src/pykeen/typing.py b/src/pykeen/typing.py index c81822b951..0daabad014 100644 --- a/src/pykeen/typing.py +++ b/src/pykeen/typing.py @@ -44,9 +44,13 @@ def cast_constrainer(f) -> Constrainer: return cast(Constrainer, f) +#: A hint for a :class:`torch.device` DeviceHint = Union[None, str, torch.device] +#: A hint for a :class:`torch.Generator` TorchRandomHint = Union[None, int, torch.Generator] +#: A type variable for head representations used in :class:`pykeen.models.Model`, +#: :class:`pykeen.nn.modules.Interaction`, etc. HeadRepresentation = TypeVar("HeadRepresentation", bound=Union[torch.FloatTensor, Sequence[torch.FloatTensor]]) RelationRepresentation = TypeVar("RelationRepresentation", bound=Union[torch.FloatTensor, Sequence[torch.FloatTensor]]) TailRepresentation = TypeVar("TailRepresentation", bound=Union[torch.FloatTensor, Sequence[torch.FloatTensor]]) From 24c63665ee1f0e65c40a8560269537a6cd0ae6fd Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 14:43:45 +0100 Subject: [PATCH 02/18] Update more examples --- docs/source/tutorial/byod.rst | 28 +++++++++++++--------------- src/pykeen/pipeline.py | 8 +++++++- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/source/tutorial/byod.rst b/docs/source/tutorial/byod.rst index 1d03387e47..8144e4624d 100644 --- a/docs/source/tutorial/byod.rst +++ b/docs/source/tutorial/byod.rst @@ -1,7 +1,9 @@ Bring Your Own Data =================== As an alternative to using a pre-packaged dataset, the training and testing can be set explicitly -by file path or with instances of :class:`pykeen.triples.TriplesFactory`. +by file path or with instances of :class:`pykeen.triples.TriplesFactory`. Throughout this +tutorial, the paths to the training, testing, and validation sets for built-in +:class:`pykeen.datasets.Nations` will be used as examples. Pre-stratified Dataset ---------------------- @@ -27,19 +29,16 @@ share the same mapping. This is equally applicable for the :func:`pykeen.hpo.hpo_pipeline`, which has a similar interface to the :func:`pykeen.pipeline.pipeline` as in: -.. code-block:: python - - from pykeen.triples import TriplesFactory - from pykeen.hpo import hpo_pipeline - from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH - - result = hpo_pipeline( - n_trials=30, - training=NATIONS_TRAIN_PATH, - testing=NATIONS_TEST_PATH, - model='TransE', - ) - result.save_to_directory('test_hpo_pre_stratified_transe') +>>> import pystow +>>> from pykeen.hpo import hpo_pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH +>>> result = hpo_pipeline( +... n_trials=3, # you probably want more than this +... training=NATIONS_TRAIN_PATH, +... testing=NATIONS_TEST_PATH, +... model='TransE', +... ) +>>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_hpo_pre_stratified_transe')) The remainder of the examples will be for :func:`pykeen.pipeline.pipeline`, but all work exactly the same for :func:`pykeen.hpo.hpo_pipeline`. @@ -49,7 +48,6 @@ to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse .. code-block:: python - from pykeen.triples import TriplesFactory from pykeen.pipeline import pipeline from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH diff --git a/src/pykeen/pipeline.py b/src/pykeen/pipeline.py index f5375ac95f..eab5371faf 100644 --- a/src/pykeen/pipeline.py +++ b/src/pykeen/pipeline.py @@ -174,6 +174,7 @@ import pickle import time from dataclasses import dataclass, field +from pathlib import Path from typing import Any, Collection, Dict, Iterable, List, Mapping, Optional, Set, Type, Union import pandas as pd @@ -423,7 +424,12 @@ def _get_results(self) -> Mapping[str, Any]: results['stopper'] = self.stopper.get_summary_dict() return results - def save_to_directory(self, directory: str, save_metadata: bool = True, save_replicates: bool = True) -> None: + def save_to_directory( + self, + directory: Union[str, Path], + save_metadata: bool = True, + save_replicates: bool = True, + ) -> None: """Save all artifacts in the given directory.""" os.makedirs(directory, exist_ok=True) From ae593c4e7486bafadc83c6e3139d95180619f6f4 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 14:55:54 +0100 Subject: [PATCH 03/18] Add doctests --- tox.ini | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tox.ini b/tox.ini index 84b4400ad0..3cb2d0cef4 100644 --- a/tox.ini +++ b/tox.ini @@ -17,6 +17,7 @@ envlist = doc8 docs # the actual tests + doctests py integration # always keep coverage-report last @@ -49,6 +50,19 @@ deps = extras = mlflow +[testenv:doctests] +commands = + # TODO make this automatic for all RST in a loop (but not using xargs since doctest uses multiprocessing) + python -m doctest docs/source/tutorial/byod.rst + #python -m doctest docs/source/tutorial/checkpoints.rst + #python -m doctest docs/source/tutorial/first_steps.rst + #python -m doctest docs/source/tutorial/making_predictions.rst + #python -m doctest docs/source/tutorial/performance.rst + #python -m doctest docs/source/tutorial/running_ablation.rst + #python -m doctest docs/source/tutorial/running_hpo.rst + #python -m doctest docs/source/tutorial/translational_toy_example.rst + #python -m doctest docs/source/tutorial/understanding_evaluation.rst + [testenv:coverage-clean] deps = coverage skip_install = true From 23175c11b67ff47d67a1d654bc3f1fe535c9ab75 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 15:14:34 +0100 Subject: [PATCH 04/18] Update doctests --- .gitignore | 1 + docs/source/tutorial/byod.rst | 169 ++++++++++++++++------------------ 2 files changed, 80 insertions(+), 90 deletions(-) diff --git a/.gitignore b/.gitignore index 7bf7dfe10a..a30a2480d9 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,4 @@ docs/source/api/* scratch/* wandb/* mlruns +doctests/ diff --git a/docs/source/tutorial/byod.rst b/docs/source/tutorial/byod.rst index 8144e4624d..71bb07fe5b 100644 --- a/docs/source/tutorial/byod.rst +++ b/docs/source/tutorial/byod.rst @@ -11,7 +11,6 @@ You've got a training and testing file as 3-column TSV files, all ready to go. Y any entities or relations appearing in the testing set that don't appear in the training set. Load them in the pipeline like this: ->>> import pystow >>> from pykeen.triples import TriplesFactory >>> from pykeen.pipeline import pipeline >>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH @@ -19,8 +18,9 @@ pipeline like this: ... training=NATIONS_TRAIN_PATH, ... testing=NATIONS_TEST_PATH, ... model='TransE', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher ... ) ->>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_pre_stratified_transe')) +>>> result.save_to_directory('doctests/test_pre_stratified_transe') PyKEEN will take care of making sure that the entities are mapped from their labels to appropriate integer (technically, 0-dimensional :class:`torch.LongTensor`) indexes and that the different sets of triples @@ -29,16 +29,17 @@ share the same mapping. This is equally applicable for the :func:`pykeen.hpo.hpo_pipeline`, which has a similar interface to the :func:`pykeen.pipeline.pipeline` as in: ->>> import pystow >>> from pykeen.hpo import hpo_pipeline ->>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH, NATIONS_VALIDATE_PATH >>> result = hpo_pipeline( ... n_trials=3, # you probably want more than this ... training=NATIONS_TRAIN_PATH, ... testing=NATIONS_TEST_PATH, +... validation=NATIONS_VALIDATE_PATH, ... model='TransE', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher ... ) ->>> result.save_to_directory(pystow.get('pykeen', 'docs', 'test_hpo_pre_stratified_transe')) +>>> result.save_to_directory('doctests/test_hpo_pre_stratified_transe') The remainder of the examples will be for :func:`pykeen.pipeline.pipeline`, but all work exactly the same for :func:`pykeen.hpo.hpo_pipeline`. @@ -46,41 +47,36 @@ for :func:`pykeen.hpo.hpo_pipeline`. If you want to add dataset-wide arguments, you can use the ``dataset_kwargs`` argument to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse_triples=True``. -.. code-block:: python - - from pykeen.pipeline import pipeline - from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH - - result = pipeline( - training=NATIONS_TRAIN_PATH, - testing=NATIONS_TEST_PATH, - dataset_kwargs={'create_inverse_triples': True}, - model='TransE', - ) - result.save_to_directory('test_pre_stratified_transe') +>>> from pykeen.pipeline import pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH +>>> result = pipeline( +... training=NATIONS_TRAIN_PATH, +... testing=NATIONS_TEST_PATH, +... dataset_kwargs={'create_inverse_triples': True}, +... model='TransE', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher +... ) +>>> result.save_to_directory('doctests/test_pre_stratified_transe') If you want finer control over how the triples are created, for example, if they are not all coming from TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface. -.. code-block:: python - - from pykeen.triples import TriplesFactory - from pykeen.pipeline import pipeline - from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH - - training = TriplesFactory(path=NATIONS_TRAIN_PATH) - testing = TriplesFactory( - path=NATIONS_TEST_PATH, - entity_to_id=training.entity_to_id, - relation_to_id=training.relation_to_id, - ) - - result = pipeline( - training_triples_factory=training, - testing_triples_factory=testing, - model='TransE', - ) - pipeline_result.save_to_directory('test_pre_stratified_transe') +>>> from pykeen.triples import TriplesFactory +>>> from pykeen.pipeline import pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH +>>> training = TriplesFactory.from_path(NATIONS_TRAIN_PATH) +>>> testing = TriplesFactory.from_path( +... NATIONS_TEST_PATH, +... entity_to_id=training.entity_to_id, +... relation_to_id=training.relation_to_id, +... ) +>>> result = pipeline( +... training=training, +... testing=testing, +... model='TransE', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher +... ) +>>> result.save_to_directory('doctests/test_pre_stratified_transe') .. warning:: @@ -94,29 +90,26 @@ The ``dataset_kwargs`` argument is ignored when passing your own :class:`pykeen. sure to include the ``create_inverse_triples=True`` in the instantiation of those classes if that's your desired behavior as in: -.. code-block:: python - - from pykeen.triples import TriplesFactory - from pykeen.pipeline import pipeline - from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH - - training = TriplesFactory( - path=NATIONS_TRAIN_PATH, - create_inverse_triples=True, - ) - testing = TriplesFactory( - path=NATIONS_TEST_PATH, - entity_to_id=training.entity_to_id, - relation_to_id=training.relation_to_id, - create_inverse_triples=True, - ) - - result = pipeline( - training_triples_factory=training, - testing_triples_factory=testing, - model='TransE', - ) - result.save_to_directory('test_pre_stratified_transe') +>>> from pykeen.triples import TriplesFactory +>>> from pykeen.pipeline import pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH +>>> training = TriplesFactory.from_path( +... NATIONS_TRAIN_PATH, +... create_inverse_triples=True, +... ) +>>> testing = TriplesFactory.from_path( +... NATIONS_TEST_PATH, +... entity_to_id=training.entity_to_id, +... relation_to_id=training.relation_to_id, +... create_inverse_triples=True, +... ) +>>> result = pipeline( +... training=training, +... testing=testing, +... model='TransE', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher +... ) +>>> result.save_to_directory('doctests/test_pre_stratified_transe') Triples factories can also be instantiated using the ``triples`` keyword argument instead of the ``path`` argument if you already have triples loaded in a :class:`numpy.ndarray`. @@ -127,38 +120,34 @@ It's more realistic your real-world dataset is not already stratified into train PyKEEN has you covered with :func:`pykeen.triples.TriplesFactory.split`, which will allow you to create a stratified dataset. -.. code-block:: python - - from pykeen.triples import TriplesFactory - from pykeen.pipeline import pipeline - from pykeen.datasets.nations import NATIONS_TRAIN_PATH - - tf = TriplesFactory(path=NATIONS_TRAIN_PATH) - training, testing = tf.split() - - result = pipeline( - training_triples_factory=training, - testing_triples_factory=testing, - model='TransE', - ) - pipeline_result.save_to_directory('test_unstratified_transe') +>>> from pykeen.triples import TriplesFactory +>>> from pykeen.pipeline import pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH +>>> tf = TriplesFactory.from_path(NATIONS_TRAIN_PATH) +>>> training, testing = tf.split() +>>> result = pipeline( +... training=training, +... testing=testing, +... model='TransE', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher +... ) +>>> result.save_to_directory('doctests/test_unstratified_transe') By default, this is an 80/20 split. If you want to use early stopping, you'll also need a validation set, so you should specify the splits: -.. code-block:: python - - from pykeen.triples import TriplesFactory - from pykeen.pipeline import pipeline - - tf = TriplesFactory(path=...) - training, testing, validation = tf.split([.8, .1, .1]) - - result = pipeline( - training_triples_factory=training, - testing_triples_factory=testing, - validation_triples_factory=validation, - model='TransE', - stopper='early', - ) - pipeline_result.save_to_directory('test_unstratified_stopped_transe') +>>> from pykeen.triples import TriplesFactory +>>> from pykeen.pipeline import pipeline +>>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH +>>> tf = TriplesFactory.from_path(NATIONS_TRAIN_PATH) +>>> training, testing, validation = tf.split([.8, .1, .1]) +>>> result = pipeline( +... training=training, +... testing=testing, +... validation=validation, +... model='TransE', +... stopper='early', +... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go +... # higher, especially with early stopper enabled +... ) +>>> result.save_to_directory('doctests/test_unstratified_stopped_transe') From 8fea70f048feb4959dfd518e9cec1a784c153b8b Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 16:59:12 +0100 Subject: [PATCH 05/18] Add doctests to GHA Trigger CI --- .github/workflows/tests.yml | 2 +- .github/workflows/tests_master.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 90ce0b5fbf..c67f555c44 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -84,7 +84,7 @@ jobs: - name: Run fast tests run: tox -e py - name: Run slow tests - run: tox -e integration + run: tox -e integration,doctests windows: if: "contains(github.event.head_commit.message, 'Trigger CI')" name: Windows diff --git a/.github/workflows/tests_master.yml b/.github/workflows/tests_master.yml index eb11b184bb..28a5828a17 100644 --- a/.github/workflows/tests_master.yml +++ b/.github/workflows/tests_master.yml @@ -83,7 +83,7 @@ jobs: - name: Run fast tests run: tox -e py - name: Run slow tests - run: tox -e integration + run: tox -e integration,doctests windows: if: "!contains(github.event.head_commit.message, 'skip ci')" name: Windows From 237ff6dffa355ea7af447f88df1135628f2d2543 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:00:18 +0100 Subject: [PATCH 06/18] Update tutorials --- docs/source/tutorial/checkpoints.rst | 223 +++++++++----------- docs/source/tutorial/making_predictions.rst | 55 ++--- tox.ini | 12 +- 3 files changed, 124 insertions(+), 166 deletions(-) diff --git a/docs/source/tutorial/checkpoints.rst b/docs/source/tutorial/checkpoints.rst index 157e0bfde7..3d30eb940d 100644 --- a/docs/source/tutorial/checkpoints.rst +++ b/docs/source/tutorial/checkpoints.rst @@ -17,55 +17,46 @@ Regular Checkpoints The tutorial :ref:`first_steps` showed how the :func:`pykeen.pipeline.pipeline` function can be used to set up an entire KGEM for training and evaluation in just two lines of code. A slightly extended example is shown below: -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict( - num_epochs=1000, - ), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=1000, +... ), +... ) To enable checkpoints, all you have to do is add a ``checkpoint_name`` argument to the ``training_kwargs``. This argument should have the name you would like the checkpoint files saved on your computer to be called. -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict( - num_epochs=1000, - checkpoint_name='my_checkpoint.pt', - ), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=1000, +... checkpoint_name='my_checkpoint.pt', +... ), +... ) Furthermore, you can set the checkpoint frequency, i.e. how often checkpoints should be saved given in minutes, by setting the argument ``checkpoint_frequency`` with an integer. The default frequency is 30 minutes and setting it to ``0`` will cause the training loop to save a checkpoint after each epoch. Let's look at an example. -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict( - num_epochs=1000, - checkpoint_name='my_checkpoint.pt', - checkpoint_frequency=5, - ), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=1000, +... checkpoint_name='my_checkpoint.pt', +... checkpoint_frequency=5, +... ), +... ) Here we have defined a pipeline that will save training loop checkpoints in the checkpoint file called ``my_checkpoint.pt`` every time an epoch finishes and at least `5` minutes have passed since saving previously. @@ -78,20 +69,17 @@ or the early stopper stops it. Assuming that you successfully trained the KGEM a that you would like to test the model with `2000` epochs, all you have to do is to change the number of epochs and execute the code like: -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict( - num_epochs=2000, # more epochs than before - checkpoint_name='my_checkpoint.pt', - checkpoint_frequency=5, - ), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=2000, # more epochs than before +... checkpoint_name='my_checkpoint.pt', +... checkpoint_frequency=5, +... ), +... ) The above code will load the saved state after finishing `1000` epochs and continue to train to `2000` epochs, giving the exact same results as if you would have run it for `2000` epochs in the first place. @@ -101,20 +89,17 @@ which is a subdirectory in your home directory, e.g. ``~/.data/pykeen/checkpoint Optionally, you can set the path to where you want the checkpoints to be saved by setting the ``checkpoint_directory`` argument with a string or a :class:`pathlib.Path` object containing your desired root path, as shown in this example: -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict( - num_epochs=2000, - checkpoint_name='my_checkpoint.pt', - checkpoint_directory='/my/secret/dir', - ), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=2000, +... checkpoint_name='my_checkpoint.pt', +... checkpoint_directory='doctests/checkpoint_dir', +... ), +... ) .. _failure_checkpoints_how_to: @@ -123,16 +108,16 @@ Checkpoints on Failure In cases where you only would like to save checkpoints whenever the training loop might fail, you can use the argument ``checkpoint_on_failure=True``, like: -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict(num_epochs=2000, checkpoint_on_failure=True), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=2000, +... checkpoint_on_failure=True, +... ), +... ) This option differs from regular checkpoints, since regular checkpoints are only saved after a successful epoch. When saving checkpoints due to failure of the training loop there is no guarantee that all @@ -141,19 +126,17 @@ specific training loop. Therefore, these checkpoints are saved with a distinct c ``PyKEEN_just_saved_my_day_{datetime}.pt`` in the given ``checkpoint_directory``, even when you also opted to use regular checkpoints as defined above, e.g. with this code: -.. code-block:: python - - from pykeen.pipeline import pipeline - pipeline_result = pipeline( - dataset='Nations', - model='TransE', - optimizer='Adam', - training_kwargs=dict( - num_epochs=2000, - checkpoint_name='my_checkpoint.pt', - checkpoint_on_failure=True, - ), - ) +>>> from pykeen.pipeline import pipeline +>>> pipeline_result = pipeline( +... dataset='Nations', +... model='TransE', +... optimizer='Adam', +... training_kwargs=dict( +... num_epochs=2000, +... checkpoint_name='my_checkpoint.pt', +... checkpoint_on_failure=True, +... ), +... ) Note: Use this argument with caution, since every failed training loop will create a distinct checkpoint file. @@ -195,19 +178,17 @@ To show how to use the checkpoint functionality without the pipeline, we define .. code-block:: python - from pykeen.models import TransE - from pykeen.training import SLCWATrainingLoop - from pykeen.triples import TriplesFactory - from torch.optim import Adam - - triples_factory = Nations().training - model = TransE( - triples_factory=triples_factory, - random_seed=123, - ) - - optimizer = Adam(params=model.get_grad_params()) - training_loop = SLCWATrainingLoop(model=model, optimizer=optimizer) +>>> from pykeen.models import TransE +>>> from pykeen.training import SLCWATrainingLoop +>>> from pykeen.triples import TriplesFactory +>>> from torch.optim import Adam +>>> triples_factory = Nations().training +>>> model = TransE( +... triples_factory=triples_factory, +... random_seed=123, +... ) +>>> optimizer = Adam(params=model.get_grad_params()) +>>> training_loop = SLCWATrainingLoop(model=model, optimizer=optimizer) At this point we have a model, dataset and optimizer all setup in a training loop and are ready to train the model with the ``training_loop``'s method :func:`pykeen.training.TrainingLoop.train`. To enable checkpoints all you have to do is @@ -222,13 +203,11 @@ argument with a string or a :class:`pathlib.Path` object containing your desired Here is an example: -.. code-block:: python - - losses = training_loop.train( - num_epochs=1000, - checkpoint_name='my_checkpoint.pt', - checkpoint_frequency=5, - ) +>>> losses = training_loop.train( +... num_epochs=1000, +... checkpoint_name='my_checkpoint.pt', +... checkpoint_frequency=5, +... ) With this code we have started the training loop with the above defined KGEM. The training loop will save a checkpoint in the ``my_checkpoint.pt`` file, which will be saved in the ``~/.data/pykeen/checkpoints/`` directory, since we haven't @@ -249,26 +228,22 @@ E.g. the above training loop finished successfully after 1000 epochs, but you wo train the same model from that state for 2000 epochs. All you have have to do is to change the argument ``num_epochs`` in the above code to: -.. code-block:: python - - losses = training_loop.train( - num_epochs=2000, - checkpoint_name='my_checkpoint.pt', - checkpoint_frequency=5, - ) +>>> losses = training_loop.train( +... num_epochs=2000, +... checkpoint_name='my_checkpoint.pt', +... checkpoint_frequency=5, +... ) and now the training loop will resume from the state at 1000 epochs and continue to train until 2000 epochs. As shown in :ref:`failure_checkpoints_how_to`, you can also save checkpoints only in cases where the training loop fails. To do this you just have to set the argument `checkpoint_on_failure=True`, like: -.. code-block:: python - - losses = training_loop.train( - num_epochs=2000, - checkpoint_directory='/my/secret/dir', - checkpoint_on_failure=True, - ) +>>> losses = training_loop.train( +... num_epochs=2000, +... checkpoint_directory='/my/secret/dir', +... checkpoint_on_failure=True, +... ) This code will save a checkpoint in case the training loop fails. Note how we also chose a new checkpoint directory by setting the `checkpoint_directory` argument to ``/my/secret/dir``. diff --git a/docs/source/tutorial/making_predictions.rst b/docs/source/tutorial/making_predictions.rst index 3e10337107..5744984674 100644 --- a/docs/source/tutorial/making_predictions.rst +++ b/docs/source/tutorial/making_predictions.rst @@ -26,30 +26,22 @@ This example shows using the :func:`pykeen.pipeline.pipeline` to train a model which will already be in memory. Each of the high-level interfaces are exposed through the model: -.. code-block:: python - - from pykeen.pipeline import pipeline - - pipeline_result = pipeline(dataset='Nations', model='RotatE') - model = pipeline_result.model - - # Predict tails - predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs') - - # Predict relations - predicted_relations_df = model.get_relation_prediction_df('brazil', 'uk') - - # Predict heads - predicted_heads_df = model.get_head_prediction_df('conferences', 'brazil') - - # Score all triples (memory intensive) - predictions_df = model.get_all_prediction_df() - - # Score top K triples - predictions_df = model.get_all_prediction_df(k=150) - - # save the model - pipeline_result.save_to_directory('nations_rotate') +>>> from pykeen.pipeline import pipeline +>>> # Run the pipeline +>>> pipeline_result = pipeline(dataset='Nations', model='RotatE') +>>> model = pipeline_result.model +>>> # Predict tails +>>> predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs') +>>> # Predict relations +>>> predicted_relations_df = model.get_relation_prediction_df('brazil', 'uk') +>>> # Predict heads +>>> predicted_heads_df = model.get_head_prediction_df('conferences', 'brazil') +>>> # Score all triples (memory intensive) +>>> predictions_df = model.get_all_prediction_df() +>>> # Score top K triples +>>> top_k_predictions_df = model.get_all_prediction_df(k=150) +>>> # save the model +>>> pipeline_result.save_to_directory('doctests/nations_rotate') Loading a Model ~~~~~~~~~~~~~~~ @@ -58,16 +50,11 @@ This example shows how to reload a previously trained model. The a file named ``trained_model.pkl``, so we will use the one from the previous example. -.. code-block:: python - - import torch - - model = torch.load('nations_rotate/trained_model.pkl') - - # Predict tails - predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs') - - # everything else is the same as above +>>> import torch +>>> model = torch.load('doctests/nations_rotate/trained_model.pkl') +>>> # Predict tails +>>> predicted_tails_df = model.get_tail_prediction_df('brazil', 'intergovorgs') +>>> # everything else is the same as above There's an example model available at https://github.com/pykeen/pykeen/blob/master/notebooks/hello_world/nations_transe/trained_model.pkl diff --git a/tox.ini b/tox.ini index 3cb2d0cef4..a1b7b76d99 100644 --- a/tox.ini +++ b/tox.ini @@ -53,15 +53,11 @@ extras = [testenv:doctests] commands = # TODO make this automatic for all RST in a loop (but not using xargs since doctest uses multiprocessing) + python -m doctest docs/source/tutorial/first_steps.rst python -m doctest docs/source/tutorial/byod.rst - #python -m doctest docs/source/tutorial/checkpoints.rst - #python -m doctest docs/source/tutorial/first_steps.rst - #python -m doctest docs/source/tutorial/making_predictions.rst - #python -m doctest docs/source/tutorial/performance.rst - #python -m doctest docs/source/tutorial/running_ablation.rst - #python -m doctest docs/source/tutorial/running_hpo.rst - #python -m doctest docs/source/tutorial/translational_toy_example.rst - #python -m doctest docs/source/tutorial/understanding_evaluation.rst + python -m doctest docs/source/tutorial/making_predictions.rst + # python -m doctest src/pykeen/pipeline.py + # python -m doctest src/pykeen/hpo/__init__.py [testenv:coverage-clean] deps = coverage From 7846d6927423bfae09841805a7eadbfe7d384646 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:05:54 +0100 Subject: [PATCH 07/18] Pass mypy --- src/pykeen/datasets/base.py | 4 ++-- src/pykeen/datasets/dbpedia.py | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/pykeen/datasets/base.py b/src/pykeen/datasets/base.py index 61080fd93c..ec368d09ed 100644 --- a/src/pykeen/datasets/base.py +++ b/src/pykeen/datasets/base.py @@ -177,7 +177,7 @@ def testing(self) -> TriplesFactory: # type:ignore # noqa: D401 return self._testing @property - def validation(self) -> TriplesFactory: # type:ignore # noqa: D401 + def validation(self) -> Optional[TriplesFactory]: # type:ignore # noqa: D401 """The validation triples factory that shares indices with the training triples factory.""" if not self._loaded: self._load() @@ -223,7 +223,7 @@ def __init__( self, training_path: Union[str, TextIO], testing_path: Union[str, TextIO], - validation_path: Union[str, TextIO], + validation_path: Union[None, str, TextIO], eager: bool = False, create_inverse_triples: bool = False, load_triples_kwargs: Optional[Mapping[str, Any]] = None, diff --git a/src/pykeen/datasets/dbpedia.py b/src/pykeen/datasets/dbpedia.py index d508d9154f..404620519d 100644 --- a/src/pykeen/datasets/dbpedia.py +++ b/src/pykeen/datasets/dbpedia.py @@ -45,8 +45,4 @@ def __init__(self, create_inverse_triples: bool = False, **kwargs): if __name__ == '__main__': - _d = DBpedia50() - _d.summarize() - print(_d.training.triples[:5]) - print(_d.testing.triples[:5]) - print(_d.validation.triples[:5]) + DBpedia50().summarize() From 76c1b4d0e960c1266244c3b738bca461b0f08578 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:09:32 +0100 Subject: [PATCH 08/18] Update README acknowledgement --- README.md | 2 +- src/pykeen/templates/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6697258ba8..c58fd6fc27 100644 --- a/README.md +++ b/README.md @@ -300,7 +300,7 @@ See [CONTRIBUTING.md](/CONTRIBUTING.md) for more information on getting involved This project has been supported by several organizations (in alphabetical order): - [Bayer](https://www.bayer.com/) -- [Enveda Therapeutics](https://envedatherapeutics.com/) +- [Enveda Biosciences](https://www.envedabio.com/) - [Fraunhofer Institute for Algorithms and Scientific Computing](https://www.scai.fraunhofer.de) - [Fraunhofer Institute for Intelligent Analysis and Information Systems](https://www.iais.fraunhofer.de) - [Fraunhofer Center for Machine Learning](https://www.cit.fraunhofer.de/de/zentren/maschinelles-lernen.html) diff --git a/src/pykeen/templates/README.md b/src/pykeen/templates/README.md index 42fdd76b4e..155b0820eb 100644 --- a/src/pykeen/templates/README.md +++ b/src/pykeen/templates/README.md @@ -202,7 +202,7 @@ See [CONTRIBUTING.md](/CONTRIBUTING.md) for more information on getting involved This project has been supported by several organizations (in alphabetical order): - [Bayer](https://www.bayer.com/) -- [Enveda Therapeutics](https://envedatherapeutics.com/) +- [Enveda Biosciences](https://www.envedabio.com/) - [Fraunhofer Institute for Algorithms and Scientific Computing](https://www.scai.fraunhofer.de) - [Fraunhofer Institute for Intelligent Analysis and Information Systems](https://www.iais.fraunhofer.de) - [Fraunhofer Center for Machine Learning](https://www.cit.fraunhofer.de/de/zentren/maschinelles-lernen.html) From 2d616872634d4223b22a30e986bd3a8fdc858006 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:09:37 +0100 Subject: [PATCH 09/18] Update license year --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 81380cad04..f48adb8aad 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2020 PyKEEN Project Team +Copyright (c) 2019-2021 PyKEEN Project Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From cd65862ad1377e728de210dbdf51d1a099fd7c31 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:09:48 +0100 Subject: [PATCH 10/18] Update AUTHORS.md Add link to GitHub authors Trigger CI --- AUTHORS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS.md b/AUTHORS.md index e8a1ae4295..b846ecf502 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -16,3 +16,5 @@ - [Michael Galkin](https://github.com/migalkin) - [Felix Hamann](https://github.com/kantholtz) - [Sankranti Joshi](https://github.com/sunny1401) + +See also: https://github.com/pykeen/pykeen/graphs/contributors From 1d28264ec37ba5e6fb260391506549c4cbb053ac Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:18:17 +0100 Subject: [PATCH 11/18] Update typing.py Co-Authored-By: Stephen Bonner <10208489+sbonner0@users.noreply.github.com> --- src/pykeen/typing.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/pykeen/typing.py b/src/pykeen/typing.py index 0f06dd7cc0..d205003a16 100644 --- a/src/pykeen/typing.py +++ b/src/pykeen/typing.py @@ -17,6 +17,7 @@ 'MappedTriples', 'EntityMapping', 'RelationMapping', + # Tensor Functions 'Initializer', 'Normalizer', 'Constrainer', @@ -24,6 +25,8 @@ 'InteractionFunction', 'DeviceHint', 'TorchRandomHint', + # Tensors + 'TensorType', 'HeadRepresentation', 'RelationRepresentation', 'TailRepresentation', @@ -34,6 +37,7 @@ X = TypeVar('X') Hint = Union[None, str, X] +#: A function that mutates the input and returns a new object of the same type as output Mutation = Callable[[X], X] OneOrSequence = Union[X, Sequence[X]] @@ -42,12 +46,17 @@ EntityMapping = Mapping[str, int] RelationMapping = Mapping[str, int] -# comment: TypeVar expects none, or at least two super-classes +#: A type variable bound by :class:`torch.Tensor` or :class:`torch.FloatTensor`. TensorType = TypeVar("TensorType", torch.Tensor, torch.FloatTensor) +#: An interaction function takes in head tensor(s), relation tensor(s), and tail tensor(s) +#: then returns a score InteractionFunction = Callable[[TensorType, TensorType, TensorType], TensorType] +#: A function that can be applied to a tensor to initialize it Initializer = Mutation[TensorType] +#: A function that can be applied to a tensor to normalize it Normalizer = Mutation[TensorType] +#: A function that can be applied to a tensor to constrain it Constrainer = Mutation[TensorType] @@ -64,7 +73,11 @@ def cast_constrainer(f) -> Constrainer: #: A type variable for head representations used in :class:`pykeen.models.Model`, #: :class:`pykeen.nn.modules.Interaction`, etc. HeadRepresentation = TypeVar("HeadRepresentation", bound=OneOrSequence[torch.FloatTensor]) +#: A type variable for relation representations used in :class:`pykeen.models.Model`, +#: :class:`pykeen.nn.modules.Interaction`, etc. RelationRepresentation = TypeVar("RelationRepresentation", bound=OneOrSequence[torch.FloatTensor]) +#: A type variable for tail representations used in :class:`pykeen.models.Model`, +#: :class:`pykeen.nn.modules.Interaction`, etc. TailRepresentation = TypeVar("TailRepresentation", bound=OneOrSequence[torch.FloatTensor]) From 9910025e18624276f973a6205455a8362baed533 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:25:51 +0100 Subject: [PATCH 12/18] Update typing.py Trigger CI Co-Authored-By: Stephen Bonner <10208489+sbonner0@users.noreply.github.com> --- src/pykeen/typing.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/pykeen/typing.py b/src/pykeen/typing.py index d205003a16..d2d0b09159 100644 --- a/src/pykeen/typing.py +++ b/src/pykeen/typing.py @@ -12,21 +12,20 @@ 'Hint', 'Mutation', 'OneOrSequence', - # Others + # Triples 'LabeledTriples', 'MappedTriples', 'EntityMapping', 'RelationMapping', + # Others + 'DeviceHint', + 'TorchRandomHint', # Tensor Functions 'Initializer', 'Normalizer', 'Constrainer', 'cast_constrainer', - 'InteractionFunction', - 'DeviceHint', - 'TorchRandomHint', # Tensors - 'TensorType', 'HeadRepresentation', 'RelationRepresentation', 'TailRepresentation', @@ -46,18 +45,12 @@ EntityMapping = Mapping[str, int] RelationMapping = Mapping[str, int] -#: A type variable bound by :class:`torch.Tensor` or :class:`torch.FloatTensor`. -TensorType = TypeVar("TensorType", torch.Tensor, torch.FloatTensor) -#: An interaction function takes in head tensor(s), relation tensor(s), and tail tensor(s) -#: then returns a score -InteractionFunction = Callable[[TensorType, TensorType, TensorType], TensorType] - #: A function that can be applied to a tensor to initialize it -Initializer = Mutation[TensorType] +Initializer = Mutation[torch.FloatTensor] #: A function that can be applied to a tensor to normalize it -Normalizer = Mutation[TensorType] +Normalizer = Mutation[torch.FloatTensor] #: A function that can be applied to a tensor to constrain it -Constrainer = Mutation[TensorType] +Constrainer = Mutation[torch.FloatTensor] def cast_constrainer(f) -> Constrainer: From 15a635585e691dc054fd234c8c894bff5b3f998e Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 20:29:45 +0100 Subject: [PATCH 13/18] Update checkpoints.rst Trigger CI Co-Authored-By: Stephen Bonner <10208489+sbonner0@users.noreply.github.com> --- docs/source/tutorial/checkpoints.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/tutorial/checkpoints.rst b/docs/source/tutorial/checkpoints.rst index 3d30eb940d..cfa550020e 100644 --- a/docs/source/tutorial/checkpoints.rst +++ b/docs/source/tutorial/checkpoints.rst @@ -176,8 +176,6 @@ the same compared to running uninterrupted without checkpoints, also for the eva To show how to use the checkpoint functionality without the pipeline, we define a KGEM first: -.. code-block:: python - >>> from pykeen.models import TransE >>> from pykeen.training import SLCWATrainingLoop >>> from pykeen.triples import TriplesFactory From ca2eaff0a370998e172334a8812c204a1190df54 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 22:29:34 +0100 Subject: [PATCH 14/18] Split out doctests Trigger CI --- .github/workflows/tests.yml | 4 +++- .github/workflows/tests_master.yml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c67f555c44..d6a864ded9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -84,7 +84,9 @@ jobs: - name: Run fast tests run: tox -e py - name: Run slow tests - run: tox -e integration,doctests + run: tox -e integration + - name: Run doctests + run: tox -e doctests windows: if: "contains(github.event.head_commit.message, 'Trigger CI')" name: Windows diff --git a/.github/workflows/tests_master.yml b/.github/workflows/tests_master.yml index 28a5828a17..9719fa7a33 100644 --- a/.github/workflows/tests_master.yml +++ b/.github/workflows/tests_master.yml @@ -83,7 +83,9 @@ jobs: - name: Run fast tests run: tox -e py - name: Run slow tests - run: tox -e integration,doctests + run: tox -e integration + - name: Run doctests + run: tox -e doctests windows: if: "!contains(github.event.head_commit.message, 'skip ci')" name: Windows From 13e8fa7c53baba9884ee97cf642727edc955b047 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 22:55:40 +0100 Subject: [PATCH 15/18] =?UTF-8?q?Bump=20version:=201.2.0-dev=20=E2=86=92?= =?UTF-8?q?=201.2.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- docs/source/conf.py | 2 +- src/pykeen/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5022316fa8..0df5d6139f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.0-dev +current_version = 1.2.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/docs/source/conf.py b/docs/source/conf.py index 39106fb3be..17e16ea185 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,7 +52,7 @@ author = 'PyKEEN Project Team' # The full version, including alpha/beta/rc tags. -release = '1.2.0-dev' +release = '1.2.0' # The short X.Y version. parsed_version = re.match( diff --git a/src/pykeen/version.py b/src/pykeen/version.py index 0a27740ef5..09325feb11 100644 --- a/src/pykeen/version.py +++ b/src/pykeen/version.py @@ -11,7 +11,7 @@ 'get_git_hash', ] -VERSION = '1.2.0-dev' +VERSION = '1.2.0' def get_git_hash() -> str: From 1963889b5997a14692c802967e676631d71fb256 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 22:56:14 +0100 Subject: [PATCH 16/18] =?UTF-8?q?Bump=20version:=201.2.0=20=E2=86=92=201.2?= =?UTF-8?q?.1-dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- docs/source/conf.py | 2 +- src/pykeen/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0df5d6139f..c5f3a17993 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.0 +current_version = 1.2.1-dev commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/docs/source/conf.py b/docs/source/conf.py index 17e16ea185..d3854dd035 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,7 +52,7 @@ author = 'PyKEEN Project Team' # The full version, including alpha/beta/rc tags. -release = '1.2.0' +release = '1.2.1-dev' # The short X.Y version. parsed_version = re.match( diff --git a/src/pykeen/version.py b/src/pykeen/version.py index 09325feb11..068ce5a0a9 100644 --- a/src/pykeen/version.py +++ b/src/pykeen/version.py @@ -11,7 +11,7 @@ 'get_git_hash', ] -VERSION = '1.2.0' +VERSION = '1.2.1-dev' def get_git_hash() -> str: From a9108f4b2b48b60509f3c58d5f98c2af15c62259 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Feb 2021 23:16:02 +0100 Subject: [PATCH 17/18] Bump versions Trigger CI following the previous release kerfuffle... --- .bumpversion.cfg | 2 +- docs/source/conf.py | 2 +- src/pykeen/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c5f3a17993..aa0ad94822 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.1-dev +current_version = 1.3.0-dev commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/docs/source/conf.py b/docs/source/conf.py index d3854dd035..0a7ca9f829 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,7 +52,7 @@ author = 'PyKEEN Project Team' # The full version, including alpha/beta/rc tags. -release = '1.2.1-dev' +release = '1.3.0-dev' # The short X.Y version. parsed_version = re.match( diff --git a/src/pykeen/version.py b/src/pykeen/version.py index 068ce5a0a9..a6ffa3c798 100644 --- a/src/pykeen/version.py +++ b/src/pykeen/version.py @@ -11,7 +11,7 @@ 'get_git_hash', ] -VERSION = '1.2.1-dev' +VERSION = '1.3.0-dev' def get_git_hash() -> str: From c9872498f246b6c0dd889283ada2ceb47e9e5e36 Mon Sep 17 00:00:00 2001 From: PyKEEN_bot Date: Fri, 12 Feb 2021 22:22:27 +0000 Subject: [PATCH 18/18] Trigger CI