Skip to content

Commit

Permalink
🐦 🪢 Add more high-level arguments to the pipeline (pykeen#495)
Browse files Browse the repository at this point in the history
* Add shortcut for setting epochs

* Enable shortcut for epochs on HPO and update docs

Trigger CI

* Mypy cleanup

Trigger CI

* Add high-level usage of TQDM

Trigger CI

* Trigger CI

Co-authored-by: PyKEEN_bot <pykeen2019@gmail.com>
  • Loading branch information
cthoyt and PyKEEN-bot authored Jun 28, 2021
1 parent 8d2e04a commit 59bafd7
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
16 changes: 8 additions & 8 deletions docs/source/byo/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pipeline like this:
... training=NATIONS_TRAIN_PATH,
... testing=NATIONS_TEST_PATH,
... model='TransE',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher
... epochs=5, # short epochs for testing - you should go higher
... )
>>> result.save_to_directory('doctests/test_pre_stratified_transe')

Expand All @@ -39,7 +39,7 @@ the :func:`pykeen.pipeline.pipeline` as in:
... testing=NATIONS_TEST_PATH,
... validation=NATIONS_VALIDATE_PATH,
... model='TransE',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher
... epochs=5, # short epochs for testing - you should go higher
... )
>>> result.save_to_directory('doctests/test_hpo_pre_stratified_transe')

Expand All @@ -56,7 +56,7 @@ to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse
... testing=NATIONS_TEST_PATH,
... dataset_kwargs={'create_inverse_triples': True},
... model='TransE',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher
... epochs=5, # short epochs for testing - you should go higher
... )
>>> result.save_to_directory('doctests/test_pre_stratified_transe')

Expand All @@ -76,7 +76,7 @@ TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface.
... training=training,
... testing=testing,
... model='TransE',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher
... epochs=5, # short epochs for testing - you should go higher
... )
>>> result.save_to_directory('doctests/test_pre_stratified_transe')

Expand Down Expand Up @@ -109,7 +109,7 @@ desired behavior as in:
... training=training,
... testing=testing,
... model='TransE',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher
... epochs=5, # short epochs for testing - you should go higher
... )
>>> result.save_to_directory('doctests/test_pre_stratified_transe')

Expand All @@ -131,7 +131,7 @@ a stratified dataset.
... training=training,
... testing=testing,
... model='TransE',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go higher
... epochs=5, # short epochs for testing - you should go higher
... )
>>> result.save_to_directory('doctests/test_unstratified_transe')

Expand All @@ -149,8 +149,8 @@ you should specify the splits:
... validation=validation,
... model='TransE',
... stopper='early',
... training_kwargs=dict(num_epochs=5), # short epochs for testing - you should go
... # higher, especially with early stopper enabled
... epochs=5, # short epochs for testing - you should go
... # higher, especially with early stopper enabled
... )
>>> result.save_to_directory('doctests/test_unstratified_stopped_transe')

Expand Down
14 changes: 10 additions & 4 deletions src/pykeen/hpo/hpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import pathlib
from dataclasses import dataclass
from typing import Any, Collection, Dict, Mapping, Optional, Type, Union
from typing import Any, Callable, Collection, Dict, Mapping, Optional, Type, Union, cast

import torch
from optuna import Study, Trial, create_study
Expand Down Expand Up @@ -476,6 +476,7 @@ def hpo_pipeline(
negative_sampler_kwargs: Optional[Mapping[str, Any]] = None,
negative_sampler_kwargs_ranges: Optional[Mapping[str, Any]] = None,
# 7. Training
epochs: Optional[int] = None,
training_kwargs: Optional[Mapping[str, Any]] = None,
training_kwargs_ranges: Optional[Mapping[str, Any]] = None,
stopper: HintType[Stopper] = None,
Expand Down Expand Up @@ -580,6 +581,8 @@ def hpo_pipeline(
Strategies for optimizing the negative samplers' hyper-parameters to override
the defaults
:param epochs:
A shortcut for setting the ``num_epochs`` key in the ``training_kwargs`` dict.
:param training_kwargs:
Keyword arguments to pass to the training loop's train function on call
:param training_kwargs_ranges:
Expand Down Expand Up @@ -658,7 +661,7 @@ def hpo_pipeline(
if regularizer is not None:
regularizer_cls = regularizer_resolver.lookup(regularizer)
elif getattr(model_cls, 'regularizer_default', None):
regularizer_cls = model_cls.regularizer_default
regularizer_cls = model_cls.regularizer_default # type:ignore
else:
regularizer_cls = None
if regularizer_cls:
Expand Down Expand Up @@ -687,6 +690,9 @@ def hpo_pipeline(
else:
negative_sampler_cls = None
# 7. Training
if epochs is not None:
training_kwargs = {} if training_kwargs is None else dict(training_kwargs)
training_kwargs['num_epochs'] = epochs
stopper_cls: Type[Stopper] = stopper_resolver.lookup(stopper)
if stopper_cls is EarlyStopper and training_kwargs_ranges and 'epochs' in training_kwargs_ranges:
raise ValueError('can not use early stopping while optimizing epochs')
Expand Down Expand Up @@ -762,7 +768,7 @@ def hpo_pipeline(

# Invoke optimization of the objective function.
study.optimize(
objective,
cast(Callable[[Trial], float], objective),
n_trials=n_trials,
timeout=timeout,
n_jobs=n_jobs or 1,
Expand Down Expand Up @@ -861,7 +867,7 @@ def suggest_discrete_power_int(trial: Trial, name: str, low: int, high: int, bas
if high <= low:
raise Exception(f"Upper bound {high} is not greater than lower bound {low}.")
choices = [base ** i for i in range(low, high + 1)]
return trial.suggest_categorical(name=name, choices=choices)
return cast(int, trial.suggest_categorical(name=name, choices=choices))


def _set_study_dataset(
Expand Down
2 changes: 1 addition & 1 deletion src/pykeen/models/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def get_all_prediction_df(
from pykeen.models.predict import get_all_prediction_df
# Train a model (quickly)
result = pipeline(model='RotatE', dataset='Nations', training_kwargs=dict(num_epochs=5))
result = pipeline(model='RotatE', dataset='Nations', epochs=5)
model = result.model
# Get scores for *all* triples
Expand Down
13 changes: 13 additions & 0 deletions src/pykeen/pipeline/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,7 @@ def pipeline( # noqa: C901
negative_sampler: HintType[NegativeSampler] = None,
negative_sampler_kwargs: Optional[Mapping[str, Any]] = None,
# 7. Training (ronaldo style)
epochs: Optional[int] = None,
training_kwargs: Optional[Mapping[str, Any]] = None,
stopper: HintType[Stopper] = None,
stopper_kwargs: Optional[Mapping[str, Any]] = None,
Expand All @@ -700,6 +701,7 @@ def pipeline( # noqa: C901
use_testing_data: bool = True,
evaluation_fallback: bool = False,
filter_validation_when_testing: bool = True,
use_tqdm: Optional[bool] = None,
) -> PipelineResult:
"""Train and evaluate a model.
Expand Down Expand Up @@ -773,6 +775,8 @@ def pipeline( # noqa: C901
:param negative_sampler_kwargs:
Keyword arguments to pass to the negative sampler class on instantiation
:param epochs:
A shortcut for setting the ``num_epochs`` key in the ``training_kwargs`` dict.
:param training_kwargs:
Keyword arguments to pass to the training loop's train function on call
:param stopper:
Expand Down Expand Up @@ -811,6 +815,9 @@ def pipeline( # noqa: C901
model using the pipeline and evaluating with the testing set, but never using the validation set for
optimization at all. This is a very atypical scenario, so it is left as true by default to promote
comparability to previous publications.
:param use_tqdm:
Globally set the usage of tqdm progress bars. Typically more useful to set to false, since the training
loop and evaluation have it turned on by default.
:returns: A pipeline result package.
Expand Down Expand Up @@ -1019,6 +1026,10 @@ def pipeline( # noqa: C901
**stopper_kwargs,
)

if epochs is not None:
training_kwargs['num_epochs'] = epochs
if use_tqdm is not None:
training_kwargs['use_tqdm'] = use_tqdm
training_kwargs.setdefault('num_epochs', 5)
training_kwargs.setdefault('batch_size', 256)
_result_tracker.log_params(params=training_kwargs, prefix='training')
Expand Down Expand Up @@ -1116,6 +1127,8 @@ def pipeline( # noqa: C901
if evaluator_instance.batch_size is not None or evaluator_instance.slice_size is not None and not use_testing_data:
evaluation_kwargs['batch_size'] = evaluator_instance.batch_size
evaluation_kwargs['slice_size'] = evaluator_instance.slice_size
if use_tqdm is not None:
evaluation_kwargs['use_tqdm'] = use_tqdm
# Add logging about evaluator for debugging
logging.debug("Evaluation will be run with following parameters:")
logging.debug(f"evaluation_kwargs: {evaluation_kwargs}")
Expand Down

0 comments on commit 59bafd7

Please sign in to comment.