Skip to content

Commit

Permalink
📚🛗 Update Prediction Migration Guide (pykeen#1233)
Browse files Browse the repository at this point in the history
- [x] fix RST lists in `pykeen.predict` module docstring
- [x] extend the migration documentation to include an example for a
method which was already deprecated in v1.9

Also updates a few minor things:
- [x] fix typo in tutorial title
- [x] update pystow's deprecated `.submodule` (removed in v0.5) to
`.module`
- [x] update code formatting to be compliant with up-to-date `black`

---------

Co-authored-by: Charles Tapley Hoyt <cthoyt@gmail.com>
  • Loading branch information
mberr and cthoyt authored Feb 21, 2023
1 parent 12fcfe1 commit d9f5155
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 41 deletions.
6 changes: 3 additions & 3 deletions docs/source/tutorial/troubleshooting.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
.. _troubleshooting:

################
Trobleshooting
################
#################
Troubleshooting
#################

***********************************************
Loading a Model from an Old Version of PyKEEN
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ lightning =
pytorch_lightning>=1.7.2
biomedicine =
bioregistry
pyobo
pyobo>=0.8.7
tests =
unittest-templates>=0.0.5
coverage
Expand Down
2 changes: 1 addition & 1 deletion src/pykeen/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
PYKEEN_HOME: Path = PYKEEN_MODULE.base
#: A subdirectory of the PyKEEN data folder for datasets, defaults to ``~/.data/pykeen/datasets``
PYKEEN_DATASETS: Path = PYKEEN_MODULE.join("datasets")
PYKEEN_DATASETS_MODULE: pystow.Module = PYKEEN_MODULE.submodule("datasets")
PYKEEN_DATASETS_MODULE: pystow.Module = PYKEEN_MODULE.module("datasets")
#: A subdirectory of the PyKEEN data folder for benchmarks, defaults to ``~/.data/pykeen/benchmarks``
PYKEEN_BENCHMARKS: Path = PYKEEN_MODULE.join("benchmarks")
#: A subdirectory of the PyKEEN data folder for experiments, defaults to ``~/.data/pykeen/experiments``
Expand Down
2 changes: 1 addition & 1 deletion src/pykeen/datasets/ea/openea.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"OpenEA",
]

OPEN_EA_MODULE = PYKEEN_DATASETS_MODULE.submodule("openea")
OPEN_EA_MODULE = PYKEEN_DATASETS_MODULE.module("openea")

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/pykeen/datasets/ea/wk3l.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
EN_DE: GraphPair = "en_de"
EN_FR: GraphPair = "en_fr"
GRAPH_PAIRS = (EN_DE, EN_FR)
WK3L_MODULE = PYKEEN_DATASETS_MODULE.submodule("wk3l")
WK3L_MODULE = PYKEEN_DATASETS_MODULE.module("wk3l")
EA_SIDES_R: Tuple[EASide, EASide] = (EA_SIDE_RIGHT, EA_SIDE_LEFT)


Expand Down
52 changes: 39 additions & 13 deletions src/pykeen/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,27 +164,46 @@
Until version 1.9, the model itself provided wrappers which would delegate to the corresponding method
in `pykeen.models.predict`
- `model.get_all_prediction_df`
- `model.get_prediction_df`
- `model.get_head_prediction_df`
- `model.get_relation_prediction_df`
- `model.get_tail_prediction_df`
* `model.get_all_prediction_df`
* `model.get_prediction_df`
* `model.get_head_prediction_df`
* `model.get_relation_prediction_df`
* `model.get_tail_prediction_df`
These methods were already deprecated and could be replaced by providing the model as explicit parameter
to the stand-alone functions from the prediction module. Thus, we will focus on the migrating the
stand-alone functions.
In the `pykeen.models.predict` module, the prediction methods were organized differently. There were
- `get_prediction_df`
- `get_head_prediction_df`
- `get_relation_prediction_df`
- `get_tail_prediction_df`
- `get_all_prediction_df`
- `predict_triples_df`
* `get_prediction_df`
* `get_head_prediction_df`
* `get_relation_prediction_df`
* `get_tail_prediction_df`
* `get_all_prediction_df`
* `predict_triples_df`
where `get_head_prediction_df`, `get_relation_prediction_df` and `get_tail_prediction_df` were deprecated in favour
of directly using `get_prediction_df`.
of directly using `get_prediction_df` with all but the prediction target being provided, i.e., e.g.,
>>> from pykeen.models import predict
>>> prediction.get_tail_prediction_df(
... model=model,
... head_label="belgium",
... relation_label="locatedin",
... triples_factory=result.training,
... )
was deprecated in favour of
>>> from pykeen.models import predict
>>> predict.get_prediction_df(
... model=model,
... head_label="brazil",
... relation_label="intergovorgs",
... triples_factory=result.training,
... )
`get_prediction_df`
-------------------
Expand All @@ -202,7 +221,14 @@
can be replaced by
>>> from pykeen import predict
>>> predict.predict_target(model=model, head="brazil", relation="intergovorgs", triples_factory=result.training).df
>>> predict.predict_target(
... model=model,
... head="brazil",
... relation="intergovorgs",
... triples_factory=result.training,
... ).df
Notice the trailing `.df`.
`get_all_prediction_df`
-----------------------
Expand Down
7 changes: 6 additions & 1 deletion src/pykeen/triples/triples_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,12 @@ def to_path_binary(self, path: Union[str, pathlib.Path, TextIO]) -> pathlib.Path
self.relation_to_id,
),
):
pd.DataFrame(data=data.items(), columns=["label", "id"],).sort_values(by="id").set_index("id").to_csv(
pd.DataFrame(
data=data.items(),
columns=["label", "id"],
).sort_values(
by="id"
).set_index("id").to_csv(
path.joinpath(f"{name}.tsv.gz"),
sep="\t",
)
Expand Down
9 changes: 6 additions & 3 deletions src/pykeen/triples/triples_numeric_literals_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,12 @@ def clone_and_exchange_triples(
def to_path_binary(self, path: Union[str, pathlib.Path, TextIO]) -> pathlib.Path: # noqa: D102
path = super().to_path_binary(path=path)
# save literal-to-id mapping
pandas.DataFrame(data=self.literals_to_id.items(), columns=["label", "id"],).sort_values(by="id").set_index(
"id"
).to_csv(
pandas.DataFrame(
data=self.literals_to_id.items(),
columns=["label", "id"],
).sort_values(
by="id"
).set_index("id").to_csv(
path.joinpath(f"{self.file_name_literal_to_id}.tsv.gz"),
sep="\t",
)
Expand Down
36 changes: 19 additions & 17 deletions tests/test_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ def test_predict_top_k_consistency():
numpy.testing.assert_equal(dfs[0][column].values, dfs[1][column].values)


def _iter_predict_triples_inputs() -> Iterable[
Tuple[pykeen.models.Model, AnyTriples, Optional[CoreTriplesFactory], Optional[int]]
]:
def _iter_predict_triples_inputs() -> (
Iterable[Tuple[pykeen.models.Model, AnyTriples, Optional[CoreTriplesFactory], Optional[int]]]
):
"""Iterate over test inputs for predict_triples."""
dataset = Nations()
factory = dataset.training
Expand Down Expand Up @@ -236,15 +236,17 @@ def test_predict_triples(
_check_score_pack(pack=pack, model=model, num_triples=num_triples)


def _iter_get_input_batch_inputs() -> Iterable[
Tuple[
Optional[CoreTriplesFactory],
Union[None, int, str],
Union[None, int, str],
Union[None, int, str],
pykeen.typing.Target,
def _iter_get_input_batch_inputs() -> (
Iterable[
Tuple[
Optional[CoreTriplesFactory],
Union[None, int, str],
Union[None, int, str],
Union[None, int, str],
pykeen.typing.Target,
]
]
]:
):
"""Iterate over test inputs for _get_input_batch."""
factory = Nations().training
# ID-based, no factory
Expand Down Expand Up @@ -281,9 +283,9 @@ def test_get_input_batch(
assert batch.flatten().tolist() == list(batch_tuple)


def _iter_get_targets_inputs() -> Iterable[
Tuple[Union[None, torch.Tensor, Collection[Union[str, int]]], Optional[CoreTriplesFactory], bool]
]:
def _iter_get_targets_inputs() -> (
Iterable[Tuple[Union[None, torch.Tensor, Collection[Union[str, int]]], Optional[CoreTriplesFactory], bool]]
):
"""Iterate over test inputs for _get_targets."""
factory = Nations().training
for entity, id_to_label in ((True, factory.entity_id_to_label), (False, factory.relation_id_to_label)):
Expand Down Expand Up @@ -333,9 +335,9 @@ def test_get_targets(
assert (ids_tensor == exp_tensor).all()


def _iter_predict_target_inputs() -> Iterable[
Tuple[pykeen.models.Model, int, int, int, Optional[CoreTriplesFactory], Optional[Sequence[int]]]
]:
def _iter_predict_target_inputs() -> (
Iterable[Tuple[pykeen.models.Model, int, int, int, Optional[CoreTriplesFactory], Optional[Sequence[int]]]]
):
# comment: we only use id-based input, since the normalization has already been tested
# create model
factory = Nations().training
Expand Down

0 comments on commit d9f5155

Please sign in to comment.