🐦 🪢 Add more high-level arguments to the pipeline (pykeen#495)

* Add shortcut for setting epochs * Enable shortcut for epochs on HPO and update docs Trigger CI * Mypy cleanup Trigger CI * Add high-level usage of TQDM Trigger CI * Trigger CI Co-authored-by: PyKEEN_bot <pykeen2019@gmail.com>
gaybro8777 · Jun 28, 2021 · 59bafd7 · 59bafd7
1 parent 8d2e04a
commit 59bafd7
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 13 deletions.
diff --git a/docs/source/byo/data.rst b/docs/source/byo/data.rst
@@ -20,7 +20,7 @@ pipeline like this:
 ...     training=NATIONS_TRAIN_PATH,
 ...     testing=NATIONS_TEST_PATH,
 ...     model='TransE',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+...     epochs=5,  # short epochs for testing - you should go higher
 ... )
 >>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
@@ -39,7 +39,7 @@ the :func:`pykeen.pipeline.pipeline` as in:
 ...     testing=NATIONS_TEST_PATH,
 ...     validation=NATIONS_VALIDATE_PATH,
 ...     model='TransE',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+...     epochs=5,  # short epochs for testing - you should go higher
 ... )
 >>> result.save_to_directory('doctests/test_hpo_pre_stratified_transe')
 
@@ -56,7 +56,7 @@ to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse
 ...     testing=NATIONS_TEST_PATH,
 ...     dataset_kwargs={'create_inverse_triples': True},
 ...     model='TransE',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+...     epochs=5,  # short epochs for testing - you should go higher
 ... )
 >>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
@@ -76,7 +76,7 @@ TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface.
 ...     training=training,
 ...     testing=testing,
 ...     model='TransE',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+...     epochs=5,  # short epochs for testing - you should go higher
 ... )
 >>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
@@ -109,7 +109,7 @@ desired behavior as in:
 ...     training=training,
 ...     testing=testing,
 ...     model='TransE',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+...     epochs=5,  # short epochs for testing - you should go higher
 ... )
 >>> result.save_to_directory('doctests/test_pre_stratified_transe')
 
@@ -131,7 +131,7 @@ a stratified dataset.
 ...     training=training,
 ...     testing=testing,
 ...     model='TransE',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go higher
+...     epochs=5,  # short epochs for testing - you should go higher
 ... )
 >>> result.save_to_directory('doctests/test_unstratified_transe')
 
@@ -149,8 +149,8 @@ you should specify the splits:
 ...     validation=validation,
 ...     model='TransE',
 ...     stopper='early',
-...     training_kwargs=dict(num_epochs=5),  # short epochs for testing - you should go
-...                                          # higher, especially with early stopper enabled
+...     epochs=5,  # short epochs for testing - you should go
+...                # higher, especially with early stopper enabled
 ... )
 >>> result.save_to_directory('doctests/test_unstratified_stopped_transe')
 

diff --git a/src/pykeen/hpo/hpo.py b/src/pykeen/hpo/hpo.py
@@ -9,7 +9,7 @@
 import os
 import pathlib
 from dataclasses import dataclass
-from typing import Any, Collection, Dict, Mapping, Optional, Type, Union
+from typing import Any, Callable, Collection, Dict, Mapping, Optional, Type, Union, cast
 
 import torch
 from optuna import Study, Trial, create_study
@@ -476,6 +476,7 @@ def hpo_pipeline(
     negative_sampler_kwargs: Optional[Mapping[str, Any]] = None,
     negative_sampler_kwargs_ranges: Optional[Mapping[str, Any]] = None,
     # 7. Training
+    epochs: Optional[int] = None,
     training_kwargs: Optional[Mapping[str, Any]] = None,
     training_kwargs_ranges: Optional[Mapping[str, Any]] = None,
     stopper: HintType[Stopper] = None,
@@ -580,6 +581,8 @@ def hpo_pipeline(
         Strategies for optimizing the negative samplers' hyper-parameters to override
         the defaults
 
+    :param epochs:
+        A shortcut for setting the ``num_epochs`` key in the ``training_kwargs`` dict.
     :param training_kwargs:
         Keyword arguments to pass to the training loop's train function on call
     :param training_kwargs_ranges:
@@ -658,7 +661,7 @@ def hpo_pipeline(
     if regularizer is not None:
         regularizer_cls = regularizer_resolver.lookup(regularizer)
     elif getattr(model_cls, 'regularizer_default', None):
-        regularizer_cls = model_cls.regularizer_default
+        regularizer_cls = model_cls.regularizer_default  # type:ignore
     else:
         regularizer_cls = None
     if regularizer_cls:
@@ -687,6 +690,9 @@ def hpo_pipeline(
     else:
         negative_sampler_cls = None
     # 7. Training
+    if epochs is not None:
+        training_kwargs = {} if training_kwargs is None else dict(training_kwargs)
+        training_kwargs['num_epochs'] = epochs
     stopper_cls: Type[Stopper] = stopper_resolver.lookup(stopper)
     if stopper_cls is EarlyStopper and training_kwargs_ranges and 'epochs' in training_kwargs_ranges:
         raise ValueError('can not use early stopping while optimizing epochs')
@@ -762,7 +768,7 @@ def hpo_pipeline(
 
     # Invoke optimization of the objective function.
     study.optimize(
-        objective,
+        cast(Callable[[Trial], float], objective),
         n_trials=n_trials,
         timeout=timeout,
         n_jobs=n_jobs or 1,
@@ -861,7 +867,7 @@ def suggest_discrete_power_int(trial: Trial, name: str, low: int, high: int, bas
     if high <= low:
         raise Exception(f"Upper bound {high} is not greater than lower bound {low}.")
     choices = [base ** i for i in range(low, high + 1)]
-    return trial.suggest_categorical(name=name, choices=choices)
+    return cast(int, trial.suggest_categorical(name=name, choices=choices))
 
 
 def _set_study_dataset(

diff --git a/src/pykeen/models/predict.py b/src/pykeen/models/predict.py
@@ -258,7 +258,7 @@ def get_all_prediction_df(
         from pykeen.models.predict import get_all_prediction_df
 
         # Train a model (quickly)
-        result = pipeline(model='RotatE', dataset='Nations', training_kwargs=dict(num_epochs=5))
+        result = pipeline(model='RotatE', dataset='Nations', epochs=5)
         model = result.model
 
         # Get scores for *all* triples

diff --git a/src/pykeen/pipeline/api.py b/src/pykeen/pipeline/api.py
@@ -683,6 +683,7 @@ def pipeline(  # noqa: C901
     negative_sampler: HintType[NegativeSampler] = None,
     negative_sampler_kwargs: Optional[Mapping[str, Any]] = None,
     # 7. Training (ronaldo style)
+    epochs: Optional[int] = None,
     training_kwargs: Optional[Mapping[str, Any]] = None,
     stopper: HintType[Stopper] = None,
     stopper_kwargs: Optional[Mapping[str, Any]] = None,
@@ -700,6 +701,7 @@ def pipeline(  # noqa: C901
     use_testing_data: bool = True,
     evaluation_fallback: bool = False,
     filter_validation_when_testing: bool = True,
+    use_tqdm: Optional[bool] = None,
 ) -> PipelineResult:
     """Train and evaluate a model.
 
@@ -773,6 +775,8 @@ def pipeline(  # noqa: C901
     :param negative_sampler_kwargs:
         Keyword arguments to pass to the negative sampler class on instantiation
 
+    :param epochs:
+        A shortcut for setting the ``num_epochs`` key in the ``training_kwargs`` dict.
     :param training_kwargs:
         Keyword arguments to pass to the training loop's train function on call
     :param stopper:
@@ -811,6 +815,9 @@ def pipeline(  # noqa: C901
         model using the pipeline and evaluating with the testing set, but never using the validation set for
         optimization at all. This is a very atypical scenario, so it is left as true by default to promote
         comparability to previous publications.
+    :param use_tqdm:
+        Globally set the usage of tqdm progress bars. Typically more useful to set to false, since the training
+        loop and evaluation have it turned on by default.
 
     :returns: A pipeline result package.
 
@@ -1019,6 +1026,10 @@ def pipeline(  # noqa: C901
         **stopper_kwargs,
     )
 
+    if epochs is not None:
+        training_kwargs['num_epochs'] = epochs
+    if use_tqdm is not None:
+        training_kwargs['use_tqdm'] = use_tqdm
     training_kwargs.setdefault('num_epochs', 5)
     training_kwargs.setdefault('batch_size', 256)
     _result_tracker.log_params(params=training_kwargs, prefix='training')
@@ -1116,6 +1127,8 @@ def pipeline(  # noqa: C901
     if evaluator_instance.batch_size is not None or evaluator_instance.slice_size is not None and not use_testing_data:
         evaluation_kwargs['batch_size'] = evaluator_instance.batch_size
         evaluation_kwargs['slice_size'] = evaluator_instance.slice_size
+    if use_tqdm is not None:
+        evaluation_kwargs['use_tqdm'] = use_tqdm
     # Add logging about evaluator for debugging
     logging.debug("Evaluation will be run with following parameters:")
     logging.debug(f"evaluation_kwargs: {evaluation_kwargs}")