📚🛗 Update Prediction Migration Guide (pykeen#1233)

- [x] fix RST lists in `pykeen.predict` module docstring - [x] extend the migration documentation to include an example for a method which was already deprecated in v1.9 Also updates a few minor things: - [x] fix typo in tutorial title - [x] update pystow's deprecated `.submodule` (removed in v0.5) to `.module` - [x] update code formatting to be compliant with up-to-date `black` --------- Co-authored-by: Charles Tapley Hoyt <cthoyt@gmail.com>
nicolafan · Feb 21, 2023 · d9f5155 · d9f5155
1 parent 12fcfe1
commit d9f5155
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 41 deletions.
diff --git a/docs/source/tutorial/troubleshooting.rst b/docs/source/tutorial/troubleshooting.rst
@@ -1,8 +1,8 @@
 .. _troubleshooting:
 
-################
- Trobleshooting
-################
+#################
+ Troubleshooting
+#################
 
 ***********************************************
  Loading a Model from an Old Version of PyKEEN

diff --git a/setup.cfg b/setup.cfg
@@ -116,7 +116,7 @@ lightning =
     pytorch_lightning>=1.7.2
 biomedicine =
     bioregistry
-    pyobo
+    pyobo>=0.8.7
 tests =
     unittest-templates>=0.0.5
     coverage

diff --git a/src/pykeen/constants.py b/src/pykeen/constants.py
@@ -38,7 +38,7 @@
 PYKEEN_HOME: Path = PYKEEN_MODULE.base
 #: A subdirectory of the PyKEEN data folder for datasets, defaults to ``~/.data/pykeen/datasets``
 PYKEEN_DATASETS: Path = PYKEEN_MODULE.join("datasets")
-PYKEEN_DATASETS_MODULE: pystow.Module = PYKEEN_MODULE.submodule("datasets")
+PYKEEN_DATASETS_MODULE: pystow.Module = PYKEEN_MODULE.module("datasets")
 #: A subdirectory of the PyKEEN data folder for benchmarks, defaults to ``~/.data/pykeen/benchmarks``
 PYKEEN_BENCHMARKS: Path = PYKEEN_MODULE.join("benchmarks")
 #: A subdirectory of the PyKEEN data folder for experiments, defaults to ``~/.data/pykeen/experiments``

diff --git a/src/pykeen/datasets/ea/openea.py b/src/pykeen/datasets/ea/openea.py
@@ -25,7 +25,7 @@
     "OpenEA",
 ]
 
-OPEN_EA_MODULE = PYKEEN_DATASETS_MODULE.submodule("openea")
+OPEN_EA_MODULE = PYKEEN_DATASETS_MODULE.module("openea")
 
 logger = logging.getLogger(__name__)
 

diff --git a/src/pykeen/datasets/ea/wk3l.py b/src/pykeen/datasets/ea/wk3l.py
@@ -36,7 +36,7 @@
 EN_DE: GraphPair = "en_de"
 EN_FR: GraphPair = "en_fr"
 GRAPH_PAIRS = (EN_DE, EN_FR)
-WK3L_MODULE = PYKEEN_DATASETS_MODULE.submodule("wk3l")
+WK3L_MODULE = PYKEEN_DATASETS_MODULE.module("wk3l")
 EA_SIDES_R: Tuple[EASide, EASide] = (EA_SIDE_RIGHT, EA_SIDE_LEFT)
 
 

diff --git a/src/pykeen/predict.py b/src/pykeen/predict.py
@@ -164,27 +164,46 @@
 Until version 1.9, the model itself provided wrappers which would delegate to the corresponding method
 in `pykeen.models.predict`
 
-- `model.get_all_prediction_df`
-- `model.get_prediction_df`
-- `model.get_head_prediction_df`
-- `model.get_relation_prediction_df`
-- `model.get_tail_prediction_df`
+* `model.get_all_prediction_df`
+* `model.get_prediction_df`
+* `model.get_head_prediction_df`
+* `model.get_relation_prediction_df`
+* `model.get_tail_prediction_df`
 
 These methods were already deprecated and could be replaced by providing the model as explicit parameter
 to the stand-alone functions from the prediction module. Thus, we will focus on the migrating the
 stand-alone functions.
 
 In the `pykeen.models.predict` module, the prediction methods were organized differently. There were
 
-- `get_prediction_df`
-- `get_head_prediction_df`
-- `get_relation_prediction_df`
-- `get_tail_prediction_df`
-- `get_all_prediction_df`
-- `predict_triples_df`
+* `get_prediction_df`
+* `get_head_prediction_df`
+* `get_relation_prediction_df`
+* `get_tail_prediction_df`
+* `get_all_prediction_df`
+* `predict_triples_df`
 
 where `get_head_prediction_df`, `get_relation_prediction_df` and `get_tail_prediction_df` were deprecated in favour
-of directly using `get_prediction_df`.
+of directly using `get_prediction_df` with all but the prediction target being provided, i.e., e.g.,
+
+>>> from pykeen.models import predict
+>>> prediction.get_tail_prediction_df(
+...     model=model,
+...     head_label="belgium",
+...     relation_label="locatedin",
+...     triples_factory=result.training,
+... )
+
+was deprecated in favour of
+
+>>> from pykeen.models import predict
+>>> predict.get_prediction_df(
+...     model=model,
+...     head_label="brazil",
+...     relation_label="intergovorgs",
+...     triples_factory=result.training,
+... )
+
 
 `get_prediction_df`
 -------------------
@@ -202,7 +221,14 @@
 can be replaced by
 
 >>> from pykeen import predict
->>> predict.predict_target(model=model, head="brazil", relation="intergovorgs", triples_factory=result.training).df
+>>> predict.predict_target(
+...     model=model,
+...     head="brazil",
+...     relation="intergovorgs",
+...     triples_factory=result.training,
+... ).df
+
+Notice the trailing `.df`.
 
 `get_all_prediction_df`
 -----------------------

diff --git a/src/pykeen/triples/triples_factory.py b/src/pykeen/triples/triples_factory.py
@@ -1032,7 +1032,12 @@ def to_path_binary(self, path: Union[str, pathlib.Path, TextIO]) -> pathlib.Path
                 self.relation_to_id,
             ),
         ):
-            pd.DataFrame(data=data.items(), columns=["label", "id"],).sort_values(by="id").set_index("id").to_csv(
+            pd.DataFrame(
+                data=data.items(),
+                columns=["label", "id"],
+            ).sort_values(
+                by="id"
+            ).set_index("id").to_csv(
                 path.joinpath(f"{name}.tsv.gz"),
                 sep="\t",
             )

diff --git a/src/pykeen/triples/triples_numeric_literals_factory.py b/src/pykeen/triples/triples_numeric_literals_factory.py
@@ -149,9 +149,12 @@ def clone_and_exchange_triples(
     def to_path_binary(self, path: Union[str, pathlib.Path, TextIO]) -> pathlib.Path:  # noqa: D102
         path = super().to_path_binary(path=path)
         # save literal-to-id mapping
-        pandas.DataFrame(data=self.literals_to_id.items(), columns=["label", "id"],).sort_values(by="id").set_index(
-            "id"
-        ).to_csv(
+        pandas.DataFrame(
+            data=self.literals_to_id.items(),
+            columns=["label", "id"],
+        ).sort_values(
+            by="id"
+        ).set_index("id").to_csv(
             path.joinpath(f"{self.file_name_literal_to_id}.tsv.gz"),
             sep="\t",
         )

diff --git a/tests/test_prediction.py b/tests/test_prediction.py
@@ -191,9 +191,9 @@ def test_predict_top_k_consistency():
         numpy.testing.assert_equal(dfs[0][column].values, dfs[1][column].values)
 
 
-def _iter_predict_triples_inputs() -> Iterable[
-    Tuple[pykeen.models.Model, AnyTriples, Optional[CoreTriplesFactory], Optional[int]]
-]:
+def _iter_predict_triples_inputs() -> (
+    Iterable[Tuple[pykeen.models.Model, AnyTriples, Optional[CoreTriplesFactory], Optional[int]]]
+):
     """Iterate over test inputs for predict_triples."""
     dataset = Nations()
     factory = dataset.training
@@ -236,15 +236,17 @@ def test_predict_triples(
     _check_score_pack(pack=pack, model=model, num_triples=num_triples)
 
 
-def _iter_get_input_batch_inputs() -> Iterable[
-    Tuple[
-        Optional[CoreTriplesFactory],
-        Union[None, int, str],
-        Union[None, int, str],
-        Union[None, int, str],
-        pykeen.typing.Target,
+def _iter_get_input_batch_inputs() -> (
+    Iterable[
+        Tuple[
+            Optional[CoreTriplesFactory],
+            Union[None, int, str],
+            Union[None, int, str],
+            Union[None, int, str],
+            pykeen.typing.Target,
+        ]
     ]
-]:
+):
     """Iterate over test inputs for _get_input_batch."""
     factory = Nations().training
     # ID-based, no factory
@@ -281,9 +283,9 @@ def test_get_input_batch(
     assert batch.flatten().tolist() == list(batch_tuple)
 
 
-def _iter_get_targets_inputs() -> Iterable[
-    Tuple[Union[None, torch.Tensor, Collection[Union[str, int]]], Optional[CoreTriplesFactory], bool]
-]:
+def _iter_get_targets_inputs() -> (
+    Iterable[Tuple[Union[None, torch.Tensor, Collection[Union[str, int]]], Optional[CoreTriplesFactory], bool]]
+):
     """Iterate over test inputs for _get_targets."""
     factory = Nations().training
     for entity, id_to_label in ((True, factory.entity_id_to_label), (False, factory.relation_id_to_label)):
@@ -333,9 +335,9 @@ def test_get_targets(
         assert (ids_tensor == exp_tensor).all()
 
 
-def _iter_predict_target_inputs() -> Iterable[
-    Tuple[pykeen.models.Model, int, int, int, Optional[CoreTriplesFactory], Optional[Sequence[int]]]
-]:
+def _iter_predict_target_inputs() -> (
+    Iterable[Tuple[pykeen.models.Model, int, int, int, Optional[CoreTriplesFactory], Optional[Sequence[int]]]]
+):
     # comment: we only use id-based input, since the normalization has already been tested
     # create model
     factory = Nations().training