diff --git a/docs/source/byo/data.rst b/docs/source/byo/data.rst index a01e43c34b..e13772ff9f 100644 --- a/docs/source/byo/data.rst +++ b/docs/source/byo/data.rst @@ -47,14 +47,14 @@ The remainder of the examples will be for :func:`pykeen.pipeline.pipeline`, but for :func:`pykeen.hpo.hpo_pipeline`. If you want to add dataset-wide arguments, you can use the ``dataset_kwargs`` argument -to the :class:`pykeen.pipeline.pipeline` to enable options like ``create_inverse_triples=True``. +to the :class:`pykeen.pipeline.pipeline` to enable options like ``use_inverse_relations=True``. >>> from pykeen.pipeline import pipeline >>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH >>> result = pipeline( ... training=NATIONS_TRAIN_PATH, ... testing=NATIONS_TEST_PATH, -... dataset_kwargs={'create_inverse_triples': True}, +... dataset_kwargs={'use_inverse_relations': True}, ... model='TransE', ... epochs=5, # short epochs for testing - you should go higher ... ) @@ -89,7 +89,7 @@ TSV files, you can use the :class:`pykeen.triples.TriplesFactory` interface. the wrong identifiers in the training set during evaluation, and we'd get nonsense results. The ``dataset_kwargs`` argument is ignored when passing your own :class:`pykeen.triples.TriplesFactory`, so be -sure to include the ``create_inverse_triples=True`` in the instantiation of those classes if that's your +sure to include the ``use_inverse_relations=True`` in the instantiation of those classes if that's your desired behavior as in: >>> from pykeen.triples import TriplesFactory @@ -97,13 +97,13 @@ desired behavior as in: >>> from pykeen.datasets.nations import NATIONS_TRAIN_PATH, NATIONS_TEST_PATH >>> training = TriplesFactory.from_path( ... NATIONS_TRAIN_PATH, -... create_inverse_triples=True, +... use_inverse_relations=True, ... ) >>> testing = TriplesFactory.from_path( ... NATIONS_TEST_PATH, ... entity_to_id=training.entity_to_id, ... relation_to_id=training.relation_to_id, -... create_inverse_triples=True, +... use_inverse_relations=True, ... ) >>> result = pipeline( ... training=training, diff --git a/docs/source/tutorial/inductive_lp.rst b/docs/source/tutorial/inductive_lp.rst index be88880fda..7b67a49e23 100644 --- a/docs/source/tutorial/inductive_lp.rst +++ b/docs/source/tutorial/inductive_lp.rst @@ -91,7 +91,7 @@ Let's create a basic `InductiveNodePiece` using one of the `InductiveFB15k237` d from pykeen.models.inductive import InductiveNodePiece from pykeen.losses import NSSALoss - dataset = InductiveFB15k237(version="v1", create_inverse_triples=True) + dataset = InductiveFB15k237(version="v1", use_inverse_relations=True) model = InductiveNodePiece( triples_factory=dataset.transductive_training, # training factory, used to tokenize training nodes @@ -110,7 +110,7 @@ Creating a message-passing version of NodePiece is pretty much the same: from pykeen.models.inductive import InductiveNodePieceGNN from pykeen.losses import NSSALoss - dataset = InductiveFB15k237(version="v1", create_inverse_triples=True) + dataset = InductiveFB15k237(version="v1", use_inverse_relations=True) model = InductiveNodePieceGNN( triples_factory=dataset.transductive_training, # training factory, will be also used for a GNN @@ -166,7 +166,7 @@ Let's create a training loop and validation / test evaluators: from pykeen.evaluation.rank_based_evaluator import SampledRankBasedEvaluator from pykeen.losses import NSSALoss - dataset = InductiveFB15k237(version="v1", create_inverse_triples=True) + dataset = InductiveFB15k237(version="v1", use_inverse_relations=True) model = ... # model init here, one of InductiveNodePiece optimizer = ... # some optimizer @@ -207,7 +207,7 @@ in the sLCWA mode with 32 negative samples per positive, with NSSALoss, and Samp from torch.optim import Adam - dataset = InductiveFB15k237(version="v1", create_inverse_triples=True) + dataset = InductiveFB15k237(version="v1", use_inverse_relations=True) model = InductiveNodePieceGNN( triples_factory=dataset.transductive_training, # training factory, will be also used for a GNN diff --git a/docs/source/tutorial/node_piece.rst b/docs/source/tutorial/node_piece.rst index 7e6b8e9447..5d7284c5c7 100644 --- a/docs/source/tutorial/node_piece.rst +++ b/docs/source/tutorial/node_piece.rst @@ -18,7 +18,7 @@ throughout the following examples. from pykeen.datasets import FB15k237 # inverses are necessary for the current version of NodePiece - dataset = FB15k237(create_inverse_triples=True) + dataset = FB15k237(use_inverse_relations=True) In the simplest usage of :class:`pykeen.models.NodePiece`, we'll only use relations for tokenization. We can do this by with the following @@ -286,7 +286,7 @@ Let's pack the last NodePiece model into the pipeline: result = pipeline( dataset="fb15k237", dataset_kwargs=dict( - create_inverse_triples=True, + use_inverse_relations=True, ), model=NodePiece, model_kwargs=dict( @@ -498,7 +498,7 @@ pipeline: result = pipeline( dataset="fb15k237", dataset_kwargs=dict( - create_inverse_triples=True, + use_inverse_relations=True, ), model=NodePiece, model_kwargs=dict( @@ -590,7 +590,7 @@ Let's use the new tokenizer for the Wikidata5M graph of 5M nodes and 20M edges. from pykeen.datasets import Wikidata5M - dataset = Wikidata5M(create_inverse_triples=True) + dataset = Wikidata5M(use_inverse_relations=True) model = NodePiece( triples_factory=dataset.training, diff --git a/docs/source/tutorial/running_ablation.rst b/docs/source/tutorial/running_ablation.rst index e3df41fe4e..f73dbe5ef0 100644 --- a/docs/source/tutorial/running_ablation.rst +++ b/docs/source/tutorial/running_ablation.rst @@ -68,7 +68,7 @@ as ``title`` are special and used by PyKEEN and :mod:`optuna`. ... ) As mentioned above, we also want to measure the effect of explicitly modeling inverse relations on the model's -performance. Therefore, we extend the ablation study by including the ``create_inverse_triples`` argument: +performance. Therefore, we extend the ablation study by including the ``use_inverse_relations`` argument: .. code-block:: python @@ -82,7 +82,7 @@ performance. Therefore, we extend the ablation study by including the ``create_i ... training_loops=["LCWA"], ... optimizers=["Adam"], ... # Add inverse triples with - ... create_inverse_triples=[True, False], + ... use_inverse_relations=[True, False], ... # Fast testing configuration, make bigger in prod ... epochs=1, ... n_trials=1, @@ -91,10 +91,10 @@ performance. Therefore, we extend the ablation study by including the ``create_i .. note:: Unlike ``models``, ``datasets``, ``losses``, ``training_loops``, and ``optimizers``, - ``create_inverse_triples`` has a default value, which is ``False``. + ``use_inverse_relations`` has a default value, which is ``False``. If there is only one value for either the ``models``, ``datasets``, ``losses``, ``training_loops``, ``optimizers``, -or ``create_inverse_triples`` argument, it can be given as a single value instead of the list. +or ``use_inverse_relations`` argument, it can be given as a single value instead of the list. .. code-block:: python @@ -107,7 +107,7 @@ or ``create_inverse_triples`` argument, it can be given as a single value instea ... losses=["BCEAfterSigmoidLoss", "MarginRankingLoss"], ... training_loops="LCWA", ... optimizers="Adam", - ... create_inverse_triples=[True, False], + ... use_inverse_relations=[True, False], ... # Fast testing configuration, make bigger in prod ... epochs=1, ... n_trials=1, @@ -200,7 +200,7 @@ the best model of each ablation-experiment using the argument ``best_replicates` ... losses=["BCEAfterSigmoidLoss", "MarginRankingLoss"], ... training_loops=["LCWA"], ... optimizers=["Adam"], - ... create_inverse_triples=[True, False], + ... use_inverse_relations=[True, False], ... stopper="early", ... stopper_kwargs={ ... "frequency": 5, @@ -384,7 +384,7 @@ Now that we defined our own hyper-parameter values/ranges, let's have a look at >>> losses = ["BCEAfterSigmoidLoss"] >>> training_loops = ["lcwa"] >>> optimizers = ["adam"] - >>> create_inverse_triples= [True, False] + >>> use_inverse_relations= [True, False] >>> stopper = "early" >>> stopper_kwargs = { ... "frequency": 5, @@ -513,7 +513,7 @@ defined within our program would look as follows: "losses": ["BCEAfterSigmoidLoss", "CrossEntropyLoss"] "training_loops": ["lcwa"], "optimizers": ["adam"], - "create_inverse_triples": [true,false], + "use_inverse_relations": [true,false], "stopper": "early" "stopper_kwargs": { "frequency": 5, diff --git a/notebooks/hello_world/hello_world.ipynb b/notebooks/hello_world/hello_world.ipynb index fbf62b6bb1..3d3e890293 100644 --- a/notebooks/hello_world/hello_world.ipynb +++ b/notebooks/hello_world/hello_world.ipynb @@ -1,2558 +1,2558 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hello, World!\n", - "\n", - "This notebook is about your first steps with knowledge graph embedding models in PyKEEN.\n", - "\n", - "You'll get to do the following:\n", - "\n", - "1. train a model\n", - "2. evaluate how good it learned\n", - "3. turn it around and start making predictions." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pip in /Users/cthoyt/.virtualenvs/pykeen/lib/python3.10/site-packages (22.2.2)\r\n" - ] - } - ], - "source": [ - "# Install packages if they're not already found\n", - "! pip install --upgrade pip\n", - "! python -c \"import pykeen\" || pip install git+https://github.com/pykeen/pykeen.git\n", - "! python -c \"import wordcloud\" || pip install wordcloud" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import torch\n", - "\n", - "import pykeen\n", - "from pykeen.datasets import Nations\n", - "from pykeen.pipeline import pipeline\n", - "\n", - "%matplotlib inline\n", - "%config InlineBackend.figure_format = 'svg'" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
Key Value
OS posix
Platform Darwin
Release 21.5.0
Time Fri Aug 19 15:10:28 2022
Python 3.10.4
PyKEEN 1.9.1-dev
PyKEEN Hash e7851f61
PyKEEN Branch fix-word-cloud
PyTorch 1.11.0
CUDA Available?false
CUDA Version N/A
cuDNN Version N/A
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pykeen.env()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train a Model\n", - "\n", - "More tutorials on training your first model can be found [here](https://pykeen.readthedocs.io/en/latest/first_steps.html).\n", - "\n", - "You can try switching out the model, add a `loss`, a `regularizer`, or switch the training assumption from `sLCWA` to `LCWA`. Each also has their own hyper-parameters, though PyKEEN tries to have reasonable defaults for you. The most useful one to change is the `num_epochs` in the `training_kwargs`, which is already below." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "application/json": { - "ascii": false, - "bar_format": null, - "colour": null, - "elapsed": 0.027060985565185547, - "initial": 0, - "n": 0, - "ncols": null, - "nrows": 25, - "postfix": null, - "prefix": "Training epochs on cpu", - "rate": null, - "total": 200, - "unit": "epoch", - "unit_divisor": 1000, - "unit_scale": false - }, - "application/vnd.jupyter.widget-view+json": { - "model_id": "280875a095554c668513e99e147e1396", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Training epochs on cpu: 0%| | 0/200 [00:00\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hello, World!\n", + "\n", + "This notebook is about your first steps with knowledge graph embedding models in PyKEEN.\n", + "\n", + "You'll get to do the following:\n", + "\n", + "1. train a model\n", + "2. evaluate how good it learned\n", + "3. turn it around and start making predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pip in /Users/cthoyt/.virtualenvs/pykeen/lib/python3.10/site-packages (22.2.2)\r\n" + ] + } + ], + "source": [ + "# Install packages if they're not already found\n", + "! pip install --upgrade pip\n", + "! python -c \"import pykeen\" || pip install git+https://github.com/pykeen/pykeen.git\n", + "! python -c \"import wordcloud\" || pip install wordcloud" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "\n", + "import pykeen\n", + "from pykeen.datasets import Nations\n", + "from pykeen.pipeline import pipeline\n", + "\n", + "%matplotlib inline\n", + "%config InlineBackend.figure_format = 'svg'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Key Value
OS posix
Platform Darwin
Release 21.5.0
Time Fri Aug 19 15:10:28 2022
Python 3.10.4
PyKEEN 1.9.1-dev
PyKEEN Hash e7851f61
PyKEEN Branch fix-word-cloud
PyTorch 1.11.0
CUDA Available?false
CUDA Version N/A
cuDNN Version N/A
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pykeen.env()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train a Model\n", + "\n", + "More tutorials on training your first model can be found [here](https://pykeen.readthedocs.io/en/latest/first_steps.html).\n", + "\n", + "You can try switching out the model, add a `loss`, a `regularizer`, or switch the training assumption from `sLCWA` to `LCWA`. Each also has their own hyper-parameters, though PyKEEN tries to have reasonable defaults for you. The most useful one to change is the `num_epochs` in the `training_kwargs`, which is already below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.027060985565185547, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": 25, + "postfix": null, + "prefix": "Training epochs on cpu", + "rate": null, + "total": 200, + "unit": "epoch", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "280875a095554c668513e99e147e1396", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training epochs on cpu: 0%| | 0/200 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2022-08-19T15:10:47.381847\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.5.2, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result.plot_losses()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Look at the result. These are pretty tricky to interpret, so remember:\n", + "\n", + "- adjusted mean rank is between [0, 2]. Closer to 0 is better!\n", + "- mean rank is a positive integer, with a bound based on the number of entities. Closer to 0 is better!\n", + "- hits@k is reported between [0, 1] and interpreted as a percentage. Closer to 1 is better!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SideTypeMetricValue
0headoptimisticz_arithmetic_mean_rank15.143086
1tailoptimisticz_arithmetic_mean_rank16.974819
2bothoptimisticz_arithmetic_mean_rank22.729568
3headrealisticz_arithmetic_mean_rank15.143086
4tailrealisticz_arithmetic_mean_rank16.974819
...............
220tailrealisticadjusted_hits_at_k1.000000
221bothrealisticadjusted_hits_at_k0.953127
222headpessimisticadjusted_hits_at_k0.905599
223tailpessimisticadjusted_hits_at_k1.000000
224bothpessimisticadjusted_hits_at_k0.953127
\n", + "

225 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " Side Type Metric Value\n", + "0 head optimistic z_arithmetic_mean_rank 15.143086\n", + "1 tail optimistic z_arithmetic_mean_rank 16.974819\n", + "2 both optimistic z_arithmetic_mean_rank 22.729568\n", + "3 head realistic z_arithmetic_mean_rank 15.143086\n", + "4 tail realistic z_arithmetic_mean_rank 16.974819\n", + ".. ... ... ... ...\n", + "220 tail realistic adjusted_hits_at_k 1.000000\n", + "221 both realistic adjusted_hits_at_k 0.953127\n", + "222 head pessimistic adjusted_hits_at_k 0.905599\n", + "223 tail pessimistic adjusted_hits_at_k 1.000000\n", + "224 both pessimistic adjusted_hits_at_k 0.953127\n", + "\n", + "[225 rows x 4 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result.metric_results.to_df()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Turn it around: make predictions\n", + "\n", + "It's very difficult to interpret KGEMs statistically, so it's best to sort order the predictions by their scores. All interaction functions in PyKEEN have been implemented such that the higher the score (or less negative the score), the more likely a triple is to be true.\n", + "\n", + "Before making any predictions, we're goign to show some word clouds of the entities and relations in the Nations dataset, with size corresponding to frequency of appearance in triples." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "usa\n", + "uk\n", + "ussr\n", + "netherlands\n", + "india\n", + "poland\n", + "egypt\n", + "brazil\n", + "israel\n", + "china\n", + "cuba\n", + "indonesia\n", + "jordan\n", + "burma\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tf.entity_word_cloud()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "embassy\n", + "commonbloc1\n", + "relngo\n", + "relintergovorgs\n", + "intergovorgs3\n", + "timesinceally\n", + "ngoorgs3\n", + "intergovorgs\n", + "reldiplomacy\n", + "independence\n", + "conferences\n", + "ngo\n", + "weightedunvote\n", + "blockpositionindex\n", + "commonbloc2\n", + "treaties\n", + "reltreaties\n", + "unweightedunvote\n", + "relexports\n", + "negativebehavior\n", + "exports3\n", + "commonbloc0\n", + "booktranslations\n", + "officialvisits\n", + "relbooktranslations\n", + "tourism\n", + "accusation\n", + "timesincewar\n", + "reltourism\n", + "pprotests\n", + "militaryalliance\n", + "nonviolentbehavior\n", + "negativecomm\n", + "tourism3\n", + "students\n", + "exportbooks\n", + "relexportbooks\n", + "releconomicaid\n", + "relstudents\n", + "economicaid\n", + "eemigrants\n", + "boycottembargo\n", + "emigrants3\n", + "duration\n", + "militaryactions\n", + "violentactions\n", + "relemigrants\n", + "dependent\n", + "expeldiplomats\n", + "unoffialacts\n", + "warning\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tf.relation_word_cloud()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mapped_triples = Nations().testing.mapped_triples.to(model.device)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tail_idtail_labelscorein_trainingin_testing
55india-2.114373TrueFalse
1212usa-2.119678TrueFalse
99netherlands-2.138442TrueFalse
1111uk-2.192534TrueFalse
44egypt-2.437390TrueFalse
66indonesia-2.617149TrueFalse
1010poland-2.621141FalseTrue
77israel-2.652526TrueFalse
33cuba-2.981478FalseTrue
1313ussr-3.058180FalseFalse
00brazil-3.211353FalseFalse
22china-3.242816FalseFalse
11burma-3.261366FalseFalse
88jordan-3.528769FalseFalse
\n", + "
" + ], + "text/plain": [ + " tail_id tail_label score in_training in_testing\n", + "5 5 india -2.114373 True False\n", + "12 12 usa -2.119678 True False\n", + "9 9 netherlands -2.138442 True False\n", + "11 11 uk -2.192534 True False\n", + "4 4 egypt -2.437390 True False\n", + "6 6 indonesia -2.617149 True False\n", + "10 10 poland -2.621141 False True\n", + "7 7 israel -2.652526 True False\n", + "3 3 cuba -2.981478 False True\n", + "13 13 ussr -3.058180 False False\n", + "0 0 brazil -3.211353 False False\n", + "2 2 china -3.242816 False False\n", + "1 1 burma -3.261366 False False\n", + "8 8 jordan -3.528769 False False" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Who do we predict brazil participates in inter-governmental organizations with?\n", + "model.get_tail_prediction_df(\"brazil\", \"intergovorgs\", triples_factory=tf, testing=testing_mapped_triples)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:pykeen.models.predict:Since remove_known is enabled, will not add novelty column\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tail_idtail_labelscore
1313ussr-3.058180
00brazil-3.211353
22china-3.242816
11burma-3.261366
88jordan-3.528769
\n", + "
" + ], + "text/plain": [ + " tail_id tail_label score\n", + "13 13 ussr -3.058180\n", + "0 0 brazil -3.211353\n", + "2 2 china -3.242816\n", + "1 1 burma -3.261366\n", + "8 8 jordan -3.528769" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Automatically filter out non-novel predictions (e.g. in training or in testing is True)\n", + "model.get_tail_prediction_df(\n", + " \"brazil\", \"intergovorgs\", triples_factory=tf, testing=testing_mapped_triples, remove_known=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
head_idhead_labelscorein_trainingin_testing
1010poland-2.129169TrueFalse
33cuba-2.224976TrueFalse
1313ussr-2.242457TrueFalse
1212usa-2.284824TrueFalse
55india-2.410077TrueFalse
1111uk-2.813856FalseFalse
66indonesia-2.837616FalseFalse
22china-2.893122FalseFalse
44egypt-2.944993FalseFalse
99netherlands-2.984741FalseFalse
77israel-3.278340FalseFalse
11burma-3.328085FalseFalse
88jordan-3.355240FalseFalse
00brazil-3.428868FalseFalse
\n", + "
" + ], + "text/plain": [ + " head_id head_label score in_training in_testing\n", + "10 10 poland -2.129169 True False\n", + "3 3 cuba -2.224976 True False\n", + "13 13 ussr -2.242457 True False\n", + "12 12 usa -2.284824 True False\n", + "5 5 india -2.410077 True False\n", + "11 11 uk -2.813856 False False\n", + "6 6 indonesia -2.837616 False False\n", + "2 2 china -2.893122 False False\n", + "4 4 egypt -2.944993 False False\n", + "9 9 netherlands -2.984741 False False\n", + "7 7 israel -3.278340 False False\n", + "1 1 burma -3.328085 False False\n", + "8 8 jordan -3.355240 False False\n", + "0 0 brazil -3.428868 False False" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Who do we predict to have a conference with brazil?\n", + "model.get_head_prediction_df(\"conferences\", \"brazil\", triples_factory=tf, testing=testing_mapped_triples)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:pykeen.models.predict:predict is an expensive operation, involving 10,780 score evaluations.\n", + "WARNING:pykeen.models.predict:Not providing k to `predict` entails huge memory requirements for reasonably-sized knowledge graphs.\n" + ] + }, + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.01923513412475586, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": 25, + "postfix": null, + "prefix": "scoring", + "rate": null, + "total": 770, + "unit": "batch", + "unit_divisor": 1000, + "unit_scale": true + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "scoring: 0%| | 0.00/770 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
head_idhead_labelrelation_idrelation_labeltail_idtail_labelscorein_trainingin_testing
05india2attackembassy6indonesia-1.138494TrueFalse
111uk24militaryalliance9netherlands-1.231564TrueFalse
27israel22lostterritory8jordan-1.267689TrueFalse
310poland22lostterritory13ussr-1.276796TrueFalse
413ussr6commonbloc012usa-1.294233TrueFalse
..............................
1077512usa13eemigrants1burma-4.371924FalseFalse
107765india19independence5india-4.388505FalseFalse
107772china27ngo2china-4.428998FalseFalse
107780brazil12economicaid13ussr-4.489155FalseFalse
107791burma19independence1burma-4.633777FalseFalse
\n", + "

10780 rows × 9 columns

\n", + "" + ], + "text/plain": [ + " head_id head_label relation_id relation_label tail_id \\\n", + "0 5 india 2 attackembassy 6 \n", + "1 11 uk 24 militaryalliance 9 \n", + "2 7 israel 22 lostterritory 8 \n", + "3 10 poland 22 lostterritory 13 \n", + "4 13 ussr 6 commonbloc0 12 \n", + "... ... ... ... ... ... \n", + "10775 12 usa 13 eemigrants 1 \n", + "10776 5 india 19 independence 5 \n", + "10777 2 china 27 ngo 2 \n", + "10778 0 brazil 12 economicaid 13 \n", + "10779 1 burma 19 independence 1 \n", + "\n", + " tail_label score in_training in_testing \n", + "0 indonesia -1.138494 True False \n", + "1 netherlands -1.231564 True False \n", + "2 jordan -1.267689 True False \n", + "3 ussr -1.276796 True False \n", + "4 usa -1.294233 True False \n", + "... ... ... ... ... \n", + "10775 burma -4.371924 False False \n", + "10776 india -4.388505 False False \n", + "10777 china -4.428998 False False \n", + "10778 ussr -4.489155 False False \n", + "10779 burma -4.633777 False False \n", + "\n", + "[10780 rows x 9 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Score all triples\n", + "model.get_all_prediction_df(triples_factory=tf, testing=testing_mapped_triples)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pykeen.evaluation.evaluator:Currently automatic memory optimization only supports GPUs, but you're using a CPU. Therefore, the batch_size will be set to the default value.\n", - "INFO:pykeen.evaluation.evaluator:No evaluation batch_size provided. Setting batch_size to '32'.\n" - ] - }, - { - "data": { - "application/json": { - "ascii": false, - "bar_format": null, - "colour": null, - "elapsed": 0.016260147094726562, - "initial": 0, - "n": 0, - "ncols": null, - "nrows": 25, - "postfix": null, - "prefix": "Evaluating on cpu", - "rate": null, - "total": 201, - "unit": "triple", - "unit_divisor": 1000, - "unit_scale": true - }, - "application/vnd.jupyter.widget-view+json": { - "model_id": "f7fbf42d62d841f788fdf8475493be33", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Evaluating on cpu: 0%| | 0.00/201 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2022-08-19T15:10:47.381847\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.5.2, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "result.plot_losses()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Look at the result. These are pretty tricky to interpret, so remember:\n", - "\n", - "- adjusted mean rank is between [0, 2]. Closer to 0 is better!\n", - "- mean rank is a positive integer, with a bound based on the number of entities. Closer to 0 is better!\n", - "- hits@k is reported between [0, 1] and interpreted as a percentage. Closer to 1 is better!" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SideTypeMetricValue
0headoptimisticz_arithmetic_mean_rank15.143086
1tailoptimisticz_arithmetic_mean_rank16.974819
2bothoptimisticz_arithmetic_mean_rank22.729568
3headrealisticz_arithmetic_mean_rank15.143086
4tailrealisticz_arithmetic_mean_rank16.974819
...............
220tailrealisticadjusted_hits_at_k1.000000
221bothrealisticadjusted_hits_at_k0.953127
222headpessimisticadjusted_hits_at_k0.905599
223tailpessimisticadjusted_hits_at_k1.000000
224bothpessimisticadjusted_hits_at_k0.953127
\n", - "

225 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " Side Type Metric Value\n", - "0 head optimistic z_arithmetic_mean_rank 15.143086\n", - "1 tail optimistic z_arithmetic_mean_rank 16.974819\n", - "2 both optimistic z_arithmetic_mean_rank 22.729568\n", - "3 head realistic z_arithmetic_mean_rank 15.143086\n", - "4 tail realistic z_arithmetic_mean_rank 16.974819\n", - ".. ... ... ... ...\n", - "220 tail realistic adjusted_hits_at_k 1.000000\n", - "221 both realistic adjusted_hits_at_k 0.953127\n", - "222 head pessimistic adjusted_hits_at_k 0.905599\n", - "223 tail pessimistic adjusted_hits_at_k 1.000000\n", - "224 both pessimistic adjusted_hits_at_k 0.953127\n", - "\n", - "[225 rows x 4 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result.metric_results.to_df()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Turn it around: make predictions\n", - "\n", - "It's very difficult to interpret KGEMs statistically, so it's best to sort order the predictions by their scores. All interaction functions in PyKEEN have been implemented such that the higher the score (or less negative the score), the more likely a triple is to be true.\n", - "\n", - "Before making any predictions, we're goign to show some word clouds of the entities and relations in the Nations dataset, with size corresponding to frequency of appearance in triples." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "usa\n", - "uk\n", - "ussr\n", - "netherlands\n", - "india\n", - "poland\n", - "egypt\n", - "brazil\n", - "israel\n", - "china\n", - "cuba\n", - "indonesia\n", - "jordan\n", - "burma\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf.entity_word_cloud()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "embassy\n", - "commonbloc1\n", - "relngo\n", - "relintergovorgs\n", - "intergovorgs3\n", - "timesinceally\n", - "ngoorgs3\n", - "intergovorgs\n", - "reldiplomacy\n", - "independence\n", - "conferences\n", - "ngo\n", - "weightedunvote\n", - "blockpositionindex\n", - "commonbloc2\n", - "treaties\n", - "reltreaties\n", - "unweightedunvote\n", - "relexports\n", - "negativebehavior\n", - "exports3\n", - "commonbloc0\n", - "booktranslations\n", - "officialvisits\n", - "relbooktranslations\n", - "tourism\n", - "accusation\n", - "timesincewar\n", - "reltourism\n", - "pprotests\n", - "militaryalliance\n", - "nonviolentbehavior\n", - "negativecomm\n", - "tourism3\n", - "students\n", - "exportbooks\n", - "relexportbooks\n", - "releconomicaid\n", - "relstudents\n", - "economicaid\n", - "eemigrants\n", - "boycottembargo\n", - "emigrants3\n", - "duration\n", - "militaryactions\n", - "violentactions\n", - "relemigrants\n", - "dependent\n", - "expeldiplomats\n", - "unoffialacts\n", - "warning\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf.relation_word_cloud()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "testing_mapped_triples = Nations().testing.mapped_triples.to(model.device)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tail_idtail_labelscorein_trainingin_testing
55india-2.114373TrueFalse
1212usa-2.119678TrueFalse
99netherlands-2.138442TrueFalse
1111uk-2.192534TrueFalse
44egypt-2.437390TrueFalse
66indonesia-2.617149TrueFalse
1010poland-2.621141FalseTrue
77israel-2.652526TrueFalse
33cuba-2.981478FalseTrue
1313ussr-3.058180FalseFalse
00brazil-3.211353FalseFalse
22china-3.242816FalseFalse
11burma-3.261366FalseFalse
88jordan-3.528769FalseFalse
\n", - "
" - ], - "text/plain": [ - " tail_id tail_label score in_training in_testing\n", - "5 5 india -2.114373 True False\n", - "12 12 usa -2.119678 True False\n", - "9 9 netherlands -2.138442 True False\n", - "11 11 uk -2.192534 True False\n", - "4 4 egypt -2.437390 True False\n", - "6 6 indonesia -2.617149 True False\n", - "10 10 poland -2.621141 False True\n", - "7 7 israel -2.652526 True False\n", - "3 3 cuba -2.981478 False True\n", - "13 13 ussr -3.058180 False False\n", - "0 0 brazil -3.211353 False False\n", - "2 2 china -3.242816 False False\n", - "1 1 burma -3.261366 False False\n", - "8 8 jordan -3.528769 False False" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Who do we predict brazil participates in inter-governmental organizations with?\n", - "model.get_tail_prediction_df(\"brazil\", \"intergovorgs\", triples_factory=tf, testing=testing_mapped_triples)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:pykeen.models.predict:Since remove_known is enabled, will not add novelty column\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tail_idtail_labelscore
1313ussr-3.058180
00brazil-3.211353
22china-3.242816
11burma-3.261366
88jordan-3.528769
\n", - "
" - ], - "text/plain": [ - " tail_id tail_label score\n", - "13 13 ussr -3.058180\n", - "0 0 brazil -3.211353\n", - "2 2 china -3.242816\n", - "1 1 burma -3.261366\n", - "8 8 jordan -3.528769" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Automatically filter out non-novel predictions (e.g. in training or in testing is True)\n", - "model.get_tail_prediction_df(\n", - " \"brazil\", \"intergovorgs\", triples_factory=tf, testing=testing_mapped_triples, remove_known=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
head_idhead_labelscorein_trainingin_testing
1010poland-2.129169TrueFalse
33cuba-2.224976TrueFalse
1313ussr-2.242457TrueFalse
1212usa-2.284824TrueFalse
55india-2.410077TrueFalse
1111uk-2.813856FalseFalse
66indonesia-2.837616FalseFalse
22china-2.893122FalseFalse
44egypt-2.944993FalseFalse
99netherlands-2.984741FalseFalse
77israel-3.278340FalseFalse
11burma-3.328085FalseFalse
88jordan-3.355240FalseFalse
00brazil-3.428868FalseFalse
\n", - "
" - ], - "text/plain": [ - " head_id head_label score in_training in_testing\n", - "10 10 poland -2.129169 True False\n", - "3 3 cuba -2.224976 True False\n", - "13 13 ussr -2.242457 True False\n", - "12 12 usa -2.284824 True False\n", - "5 5 india -2.410077 True False\n", - "11 11 uk -2.813856 False False\n", - "6 6 indonesia -2.837616 False False\n", - "2 2 china -2.893122 False False\n", - "4 4 egypt -2.944993 False False\n", - "9 9 netherlands -2.984741 False False\n", - "7 7 israel -3.278340 False False\n", - "1 1 burma -3.328085 False False\n", - "8 8 jordan -3.355240 False False\n", - "0 0 brazil -3.428868 False False" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Who do we predict to have a conference with brazil?\n", - "model.get_head_prediction_df(\"conferences\", \"brazil\", triples_factory=tf, testing=testing_mapped_triples)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:pykeen.models.predict:predict is an expensive operation, involving 10,780 score evaluations.\n", - "WARNING:pykeen.models.predict:Not providing k to `predict` entails huge memory requirements for reasonably-sized knowledge graphs.\n" - ] - }, - { - "data": { - "application/json": { - "ascii": false, - "bar_format": null, - "colour": null, - "elapsed": 0.01923513412475586, - "initial": 0, - "n": 0, - "ncols": null, - "nrows": 25, - "postfix": null, - "prefix": "scoring", - "rate": null, - "total": 770, - "unit": "batch", - "unit_divisor": 1000, - "unit_scale": true - }, - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "scoring: 0%| | 0.00/770 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
head_idhead_labelrelation_idrelation_labeltail_idtail_labelscorein_trainingin_testing
05india2attackembassy6indonesia-1.138494TrueFalse
111uk24militaryalliance9netherlands-1.231564TrueFalse
27israel22lostterritory8jordan-1.267689TrueFalse
310poland22lostterritory13ussr-1.276796TrueFalse
413ussr6commonbloc012usa-1.294233TrueFalse
..............................
1077512usa13eemigrants1burma-4.371924FalseFalse
107765india19independence5india-4.388505FalseFalse
107772china27ngo2china-4.428998FalseFalse
107780brazil12economicaid13ussr-4.489155FalseFalse
107791burma19independence1burma-4.633777FalseFalse
\n", - "

10780 rows × 9 columns

\n", - "" - ], - "text/plain": [ - " head_id head_label relation_id relation_label tail_id \\\n", - "0 5 india 2 attackembassy 6 \n", - "1 11 uk 24 militaryalliance 9 \n", - "2 7 israel 22 lostterritory 8 \n", - "3 10 poland 22 lostterritory 13 \n", - "4 13 ussr 6 commonbloc0 12 \n", - "... ... ... ... ... ... \n", - "10775 12 usa 13 eemigrants 1 \n", - "10776 5 india 19 independence 5 \n", - "10777 2 china 27 ngo 2 \n", - "10778 0 brazil 12 economicaid 13 \n", - "10779 1 burma 19 independence 1 \n", - "\n", - " tail_label score in_training in_testing \n", - "0 indonesia -1.138494 True False \n", - "1 netherlands -1.231564 True False \n", - "2 jordan -1.267689 True False \n", - "3 ussr -1.276796 True False \n", - "4 usa -1.294233 True False \n", - "... ... ... ... ... \n", - "10775 burma -4.371924 False False \n", - "10776 india -4.388505 False False \n", - "10777 china -4.428998 False False \n", - "10778 ussr -4.489155 False False \n", - "10779 burma -4.633777 False False \n", - "\n", - "[10780 rows x 9 columns]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Score all triples\n", - "model.get_all_prediction_df(triples_factory=tf, testing=testing_mapped_triples)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/src/pykeen/ablation/ablation.py b/src/pykeen/ablation/ablation.py index 4272062ac3..94310f9537 100644 --- a/src/pykeen/ablation/ablation.py +++ b/src/pykeen/ablation/ablation.py @@ -36,7 +36,7 @@ def ablation_pipeline( training_loops: Union[str, List[str]], *, epochs: Optional[int] = None, - create_inverse_triples: Union[bool, List[bool]] = False, + use_inverse_relations: Union[bool, List[bool]] = False, regularizers: Union[None, str, List[str]] = None, negative_sampler: Union[str, None] = None, evaluator: Optional[str] = None, @@ -80,7 +80,7 @@ def ablation_pipeline( :param optimizers: An optimizer name or list of optimizer names. :param training_loops: A training loop name or list of training loop names. :param epochs: A quick way to set the ``num_epochs`` in the training kwargs. - :param create_inverse_triples: Either a boolean for a single entry or a list of booleans. + :param use_inverse_relations: Either a boolean for a single entry or a list of booleans. :param regularizers: A regularizer name, list of regularizer names, or None if no regularizer is desired. :param negative_sampler: A negative sampler name, list of regularizer names, or None if no negative sampler is desired. Negative sampling is used only in combination with :class:`pykeen.training.SLCWATrainingLoop`. @@ -148,7 +148,7 @@ def ablation_pipeline( optimizers=optimizers, training_loops=training_loops, epochs=epochs, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, regularizers=regularizers, model_to_model_kwargs=model_to_model_kwargs, model_to_model_kwargs_ranges=model_to_model_kwargs_ranges, @@ -324,7 +324,7 @@ def prepare_ablation( # noqa:C901 directory: Union[str, pathlib.Path], *, epochs: Optional[int] = None, - create_inverse_triples: Union[bool, List[bool]] = False, + use_inverse_relations: Union[bool, List[bool]] = False, regularizers: Union[None, str, List[str], List[None]] = None, negative_sampler: Optional[str] = None, evaluator: Optional[str] = None, @@ -362,7 +362,7 @@ def prepare_ablation( # noqa:C901 :param optimizers: An optimizer name or list of optimizer names. :param training_loops: A training loop name or list of training loop names. :param epochs: A quick way to set the ``num_epochs`` in the training kwargs. - :param create_inverse_triples: Either a boolean for a single entry or a list of booleans. + :param use_inverse_relations: Either a boolean for a single entry or a list of booleans. :param regularizers: A regularizer name, list of regularizer names, or None if no regularizer is desired. :param negative_sampler: A negative sampler name, list of regularizer names, or None if no negative sampler is desired. Negative sampling is used only in combination with the pykeen.training.sclwa training loop. @@ -423,8 +423,8 @@ def prepare_ablation( # noqa:C901 directory = normalize_path(path=directory) if isinstance(datasets, str): datasets = [datasets] - if isinstance(create_inverse_triples, bool): - create_inverse_triples = [create_inverse_triples] + if isinstance(use_inverse_relations, bool): + use_inverse_relations = [use_inverse_relations] if isinstance(models, str): models = [models] if isinstance(losses, str): @@ -440,7 +440,7 @@ def prepare_ablation( # noqa:C901 it = itt.product( datasets, - create_inverse_triples, + use_inverse_relations, models, losses, regularizers, @@ -456,7 +456,7 @@ def prepare_ablation( # noqa:C901 directories = [] for counter, ( dataset, - create_inverse_triples, + use_inverse_relations, model, loss, regularizer, @@ -515,8 +515,8 @@ def _set_arguments(config: Optional[Mapping3D], key: str, value: str) -> None: "the paths to the training, testing, and validation data.", ) logger.info(f"Dataset: {dataset}") - hpo_config["dataset_kwargs"] = dict(create_inverse_triples=create_inverse_triples) - logger.info(f"Add inverse triples: {create_inverse_triples}") + hpo_config["dataset_kwargs"] = dict(use_inverse_relations=use_inverse_relations) + logger.info(f"Add inverse triples: {use_inverse_relations}") hpo_config["model"] = model hpo_config["model_kwargs"] = model_to_model_kwargs.get(model, {}) diff --git a/src/pykeen/contrib/lightning.py b/src/pykeen/contrib/lightning.py index f6cc2bf22e..3c3228bd3e 100644 --- a/src/pykeen/contrib/lightning.py +++ b/src/pykeen/contrib/lightning.py @@ -13,7 +13,7 @@ model = LitLCWAModule( dataset="fb15k237", - dataset_kwargs=dict(create_inverse_triples=True), + dataset_kwargs=dict(use_inverse_relations=True), model="mure", model_kwargs=dict(embedding_dim=128, loss="bcewithlogits"), batch_size=128, @@ -306,7 +306,7 @@ def lit_pipeline( @click.command() @lit_module_resolver.get_option("-tl", "--training-loop") @dataset_resolver.get_option("--dataset", default="nations") -@options.inverse_triples_option +@options.use_inverse_relations_option @model_resolver.get_option("-m", "--model", default="mure") @loss_resolver.get_option("-l", "--loss", default="bcewithlogits") @options.batch_size_option @@ -317,7 +317,7 @@ def lit_pipeline( def _main( training_loop: HintOrType[LitModule], dataset: HintOrType[Dataset], - create_inverse_triples: bool, + use_inverse_relations: bool, model: HintOrType[Model], loss: HintOrType[Loss], batch_size: int, @@ -330,9 +330,8 @@ def _main( training_loop=training_loop, training_loop_kwargs=dict( dataset=dataset, - dataset_kwargs=dict(create_inverse_triples=create_inverse_triples), model=model, - model_kwargs=dict(embedding_dim=embedding_dim, loss=loss), + model_kwargs=dict(embedding_dim=embedding_dim, loss=loss, use_inverse_relations=use_inverse_relations), batch_size=batch_size, ), trainer_kwargs=dict( diff --git a/src/pykeen/datasets/base.py b/src/pykeen/datasets/base.py index a986dce58a..0418eb6c00 100644 --- a/src/pykeen/datasets/base.py +++ b/src/pykeen/datasets/base.py @@ -90,7 +90,7 @@ def __eq__(self, __o: object) -> bool: # noqa: D105 and (self.training == __o.training) and (self.testing == __o.testing) and ((self.validation is None and __o.validation is None) or (self.validation == __o.validation)) - and (self.create_inverse_triples == __o.create_inverse_triples) + and (self.use_inverse_relations == __o.use_inverse_relations) ) @property @@ -129,9 +129,9 @@ def num_relations(self): # noqa: D401 return self.training.num_relations @property - def create_inverse_triples(self): + def use_inverse_relations(self): """Return whether inverse triples are created *for the training factory*.""" - return self.training.create_inverse_triples + return self.training.use_inverse_relations @classmethod def docdata(cls, *parts: str) -> Any: @@ -165,7 +165,7 @@ def summary_str(self, title: Optional[str] = None, show_examples: Optional[int] n_triples = sum(count for *_, count in rows) rows.append(("Total", "-", "-", n_triples)) t = tabulate(rows, headers=["Name", "Entities", "Relations", "Triples"]) - rv = f"{title or self.__class__.__name__} (create_inverse_triples={self.create_inverse_triples})\n{t}" + rv = f"{title or self.__class__.__name__} (use_inverse_relations={self.use_inverse_relations})\n{t}" if show_examples: if not isinstance(self.training, TriplesFactory): raise AttributeError(f"{self.training.__class__} does not have labeling information.") @@ -184,7 +184,7 @@ def iter_extra_repr(self) -> Iterable[str]: """Yield extra entries for the instance's string representation.""" yield f"num_entities={self.num_entities}" yield f"num_relations={self.num_relations}" - yield f"create_inverse_triples={self.create_inverse_triples}" + yield f"use_inverse_relations={self.use_inverse_relations}" @classmethod def from_path(cls, path: Union[str, pathlib.Path], ratios: Optional[List[float]] = None) -> "Dataset": @@ -394,7 +394,7 @@ def __init__( testing_path: Union[str, pathlib.Path], validation_path: Union[None, str, pathlib.Path], eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, load_triples_kwargs: Optional[Mapping[str, Any]] = None, ) -> None: """Initialize the dataset. @@ -403,7 +403,7 @@ def __init__( :param testing_path: Path to the testing triples file or testing triples file. :param validation_path: Path to the validation triples file or validation triples file. :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param load_triples_kwargs: Arguments to pass through to :func:`TriplesFactory.from_path` and ultimately through to :func:`pykeen.triples.utils.load_triples`. """ @@ -411,7 +411,7 @@ def __init__( self.testing_path = pathlib.Path(testing_path) self.validation_path = pathlib.Path(validation_path) if validation_path else None - self._create_inverse_triples = create_inverse_triples + self._use_inverse_relations = use_inverse_relations self.load_triples_kwargs = load_triples_kwargs if eager: @@ -421,7 +421,7 @@ def __init__( def _load(self) -> None: self._training = TriplesFactory.from_path( path=self.training_path, - create_inverse_triples=self._create_inverse_triples, + use_inverse_relations=self._use_inverse_relations, load_triples_kwargs=self.load_triples_kwargs, ) self._testing = TriplesFactory.from_path( @@ -429,7 +429,7 @@ def _load(self) -> None: entity_to_id=self._training.entity_to_id, # share entity index with training relation_to_id=self._training.relation_to_id, # share relation index with training # do not explicitly create inverse triples for testing; this is handled by the evaluation code - create_inverse_triples=False, + use_inverse_relations=False, load_triples_kwargs=self.load_triples_kwargs, ) @@ -445,7 +445,7 @@ def _load_validation(self) -> None: entity_to_id=self._training.entity_to_id, # share entity index with training relation_to_id=self._training.relation_to_id, # share relation index with training # do not explicitly create inverse triples for testing; this is handled by the evaluation code - create_inverse_triples=False, + use_inverse_relations=False, load_triples_kwargs=self.load_triples_kwargs, ) @@ -467,7 +467,7 @@ def __init__( cache_root: Optional[str] = None, force: bool = False, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, load_triples_kwargs: Optional[Mapping[str, Any]] = None, download_kwargs: Optional[Mapping[str, Any]] = None, ): @@ -481,7 +481,7 @@ def __init__( This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.data/pykeen``. :param force: If true, redownload any cached files :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param load_triples_kwargs: Arguments to pass through to :func:`TriplesFactory.from_path` and ultimately through to :func:`pykeen.triples.utils.load_triples`. :param download_kwargs: Keyword arguments to pass to :func:`pystow.utils.download` @@ -512,7 +512,7 @@ def __init__( testing_path=testing_path, validation_path=validation_path, eager=eager, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, load_triples_kwargs=load_triples_kwargs, ) @@ -528,7 +528,7 @@ def __init__( relative_validation_path: Union[str, pathlib.PurePath], cache_root: Optional[str] = None, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, ): """Initialize dataset. @@ -541,7 +541,7 @@ def __init__( An optional directory to store the extracted files. Is none is given, the default PyKEEN directory is used. This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.data/pykeen``. :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. """ self.cache_root = self._help_cache(cache_root) @@ -556,7 +556,7 @@ def __init__( testing_path=testing_path, validation_path=validation_path, eager=eager, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, ) def _get_paths(self) -> Tuple[pathlib.Path, pathlib.Path, pathlib.Path]: # noqa: D401 @@ -617,7 +617,7 @@ def __init__( name: Optional[str] = None, cache_root: Optional[str] = None, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, ): """Initialize dataset. @@ -632,7 +632,7 @@ def __init__( An optional directory to store the extracted files. Is none is given, the default PyKEEN directory is used. This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.pykeen``. :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :raises ValueError: if there's no URL specified and there is no data already at the calculated path """ @@ -649,7 +649,7 @@ def __init__( self.relative_training_path = pathlib.PurePath(relative_training_path) self.relative_testing_path = pathlib.PurePath(relative_testing_path) self.relative_validation_path = pathlib.PurePath(relative_validation_path) - self._create_inverse_triples = create_inverse_triples + self._use_inverse_relations = use_inverse_relations if eager: self._load() self._load_validation() @@ -695,7 +695,7 @@ def _load_helper( ) return TriplesFactory.from_labeled_triples( triples=df.values, - create_inverse_triples=self._create_inverse_triples, + use_inverse_relations=self._use_inverse_relations, metadata={"path": relative_path}, entity_to_id=entity_to_id, relation_to_id=relation_to_id, @@ -714,7 +714,7 @@ def __init__( name: Optional[str] = None, cache_root: Optional[str] = None, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, delimiter: Optional[str] = None, random_state: TorchRandomHint = None, ): @@ -729,7 +729,7 @@ def __init__( :param cache_root: An optional directory to store the extracted files. Is none is given, the default PyKEEN directory is used. This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.pykeen``. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param eager: Should the data be loaded eagerly? Defaults to false. :param random_state: An optional random state to make the training/testing/validation split reproducible. :param delimiter: @@ -741,7 +741,7 @@ def __init__( self.random_state = random_state self.delimiter = delimiter or "\t" self.url = url - self._create_inverse_triples = create_inverse_triples + self._use_inverse_relations = use_inverse_relations self._relative_path = pathlib.PurePosixPath(relative_path) if eager: @@ -755,7 +755,7 @@ def _load(self) -> None: tf_path = self._get_path() tf = TriplesFactory.from_labeled_triples( triples=df.values, - create_inverse_triples=self._create_inverse_triples, + use_inverse_relations=self._use_inverse_relations, metadata={"path": tf_path}, ) self._training, self._testing, self._validation = cast( @@ -822,7 +822,7 @@ def __init__( self, cache_root: Optional[str] = None, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, random_state: TorchRandomHint = None, ): """Initialize dataset. @@ -831,14 +831,14 @@ def __init__( An optional directory to store the extracted files. Is none is given, the default PyKEEN directory is used. This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.pykeen``. :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param random_state: An optional random state to make the training/testing/validation split reproducible. """ self.cache_root = self._help_cache(cache_root) self._triples_factory = None self.random_state = random_state - self._create_inverse_triples = create_inverse_triples + self._use_inverse_relations = use_inverse_relations self._training = None self._testing = None self._validation = None @@ -857,7 +857,7 @@ def _load(self) -> None: path = self._get_path() tf = TriplesFactory.from_labeled_triples( triples=df.values, - create_inverse_triples=self._create_inverse_triples, + use_inverse_relations=self._use_inverse_relations, metadata=dict(path=path) if path else None, ) self._training, self._testing, self._validation = cast( @@ -887,7 +887,7 @@ def __init__( name: Optional[str] = None, cache_root: Optional[str] = None, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, random_state: TorchRandomHint = None, download_kwargs: Optional[Dict[str, Any]] = None, read_csv_kwargs: Optional[Dict[str, Any]] = None, @@ -902,7 +902,7 @@ def __init__( An optional directory to store the extracted files. Is none is given, the default PyKEEN directory is used. This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.pykeen``. :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param random_state: An optional random state to make the training/testing/validation split reproducible. :param download_kwargs: Keyword arguments to pass through to :func:`pystow.utils.download`. :param read_csv_kwargs: Keyword arguments to pass through to :func:`pandas.read_csv`. @@ -911,7 +911,7 @@ def __init__( """ super().__init__( cache_root=cache_root, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, random_state=random_state, eager=False, # because it gets hooked below ) diff --git a/src/pykeen/datasets/ea/base.py b/src/pykeen/datasets/ea/base.py index b40e04af2e..ae2c0571a5 100644 --- a/src/pykeen/datasets/ea/base.py +++ b/src/pykeen/datasets/ea/base.py @@ -30,7 +30,7 @@ def __init__( self, *, side: Optional[EASide] = EA_SIDE_LEFT, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, random_state: TorchRandomHint = 0, split_ratios: Tuple[float, float, float] = (0.8, 0.1, 0.1), combination: HintOrType[GraphPairCombinator] = None, @@ -43,7 +43,7 @@ def __init__( :param side: the side, if only a single graph should be considered, or `None` to combine the two graphs into a single one, using `combination`. - :param create_inverse_triples: + :param use_inverse_relations: whether to create inverse triples. :param random_state: the random state to use for reproducible splits @@ -86,7 +86,7 @@ def __init__( # split training, testing, validation = tf.split(ratios=split_ratios, random_state=random_state) # create inverse triples only for training - training.create_inverse_triples = create_inverse_triples + training.use_inverse_relations = use_inverse_relations super().__init__(training=training, testing=testing, validation=validation, **kwargs) @abstractmethod diff --git a/src/pykeen/datasets/inductive/base.py b/src/pykeen/datasets/inductive/base.py index 1cf11eb661..83dd77886b 100644 --- a/src/pykeen/datasets/inductive/base.py +++ b/src/pykeen/datasets/inductive/base.py @@ -42,7 +42,7 @@ class InductiveDataset: #: A factory wrapping the validation triples, that share indices with the INDUCTIVE INFERENCE triples inductive_validation: Optional[CoreTriplesFactory] = None #: All datasets should take care of inverse triple creation - create_inverse_triples: bool = True + use_inverse_relations: bool = True def _summary_rows(self): return [ @@ -64,7 +64,7 @@ def summary_str(self, title: Optional[str] = None, show_examples: Optional[int] n_triples = sum(count for *_, count in rows) rows.append(("Total", "-", "-", n_triples)) t = tabulate(rows, headers=["Name", "Entities", "Relations", "Triples"]) - rv = f"{title or self.__class__.__name__} (create_inverse_triples={self.create_inverse_triples})\n{t}" + rv = f"{title or self.__class__.__name__} (use_inverse_relations={self.use_inverse_relations})\n{t}" if show_examples: if not isinstance(self.transductive_training, TriplesFactory): raise AttributeError(f"{self.transductive_training.__class__} does not have labeling information.") @@ -94,7 +94,7 @@ class EagerInductiveDataset(InductiveDataset): inductive_inference: CoreTriplesFactory inductive_testing: CoreTriplesFactory inductive_validation: Optional[CoreTriplesFactory] = None - create_inverse_triples: bool = True + use_inverse_relations: bool = True class LazyInductiveDataset(InductiveDataset): @@ -203,7 +203,7 @@ def __init__( inductive_testing_path: Union[str, pathlib.Path], inductive_validation_path: Union[str, str, pathlib.Path], eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, load_triples_kwargs: Optional[Mapping[str, Any]] = None, ) -> None: """Initialize the dataset. @@ -213,7 +213,7 @@ def __init__( :param inductive_testing_path: Path to the testing triples file or testing triples file. :param inductive_validation_path: Path to the validation triples file or validation triples file. :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param load_triples_kwargs: Arguments to pass through to :func:`TriplesFactory.from_path` and ultimately through to :func:`pykeen.triples.utils.load_triples`. """ @@ -222,7 +222,7 @@ def __init__( self.inductive_testing_path = pathlib.Path(inductive_testing_path) self.inductive_validation_path = pathlib.Path(inductive_validation_path) - self.create_inverse_triples = create_inverse_triples + self.use_inverse_relations = use_inverse_relations self.load_triples_kwargs = load_triples_kwargs if eager: @@ -231,14 +231,14 @@ def __init__( def _load(self) -> None: self._transductive_training = TriplesFactory.from_path( path=self.transductive_training_path, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, load_triples_kwargs=self.load_triples_kwargs, ) # important: inductive_inference shares the same RELATIONS with the transductive training graph self._inductive_inference = TriplesFactory.from_path( path=self.inductive_inference_path, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, relation_to_id=self._transductive_training.relation_to_id, load_triples_kwargs=self.load_triples_kwargs, ) @@ -249,7 +249,7 @@ def _load(self) -> None: entity_to_id=self._inductive_inference.entity_to_id, # shares entity index with inductive inference relation_to_id=self._inductive_inference.relation_to_id, # shares relation index with inductive inference # do not explicitly create inverse triples for testing; this is handled by the evaluation code - create_inverse_triples=False, + use_inverse_relations=False, load_triples_kwargs=self.load_triples_kwargs, ) @@ -259,7 +259,7 @@ def _load(self) -> None: entity_to_id=self._inductive_inference.entity_to_id, # share entity index with inductive inference relation_to_id=self._inductive_inference.relation_to_id, # share relation index with inductive inference # do not explicitly create inverse triples for testing; this is handled by the evaluation code - create_inverse_triples=False, + use_inverse_relations=False, load_triples_kwargs=self.load_triples_kwargs, ) @@ -284,7 +284,7 @@ def __init__( cache_root: Optional[str] = None, force: bool = False, eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, load_triples_kwargs: Optional[Mapping[str, Any]] = None, download_kwargs: Optional[Mapping[str, Any]] = None, version: Optional[str] = None, @@ -300,7 +300,7 @@ def __init__( This is defined either by the environment variable ``PYKEEN_HOME`` or defaults to ``~/.data/pykeen``. :param force: If true, redownload any cached files :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. :param load_triples_kwargs: Arguments to pass through to :func:`TriplesFactory.from_path` and ultimately through to :func:`pykeen.triples.utils.load_triples`. :param download_kwargs: Keyword arguments to pass to :func:`pystow.utils.download` @@ -336,6 +336,6 @@ def __init__( inductive_testing_path=inductive_testing_path, inductive_validation_path=inductive_validation_path, eager=eager, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, load_triples_kwargs=load_triples_kwargs, ) diff --git a/src/pykeen/datasets/inductive/ilpc2022.py b/src/pykeen/datasets/inductive/ilpc2022.py index 32b408655c..74be29ab6e 100644 --- a/src/pykeen/datasets/inductive/ilpc2022.py +++ b/src/pykeen/datasets/inductive/ilpc2022.py @@ -46,7 +46,7 @@ def __init__(self, **kwargs): inductive_inference_url=SMALL_INFERENCE_URL, inductive_validation_url=SMALL_INFERENCE_VAL_URL, inductive_testing_url=SMALL_INFERENCE_TEST_URL, - create_inverse_triples=True, + use_inverse_relations=True, eager=True, **kwargs, ) @@ -75,7 +75,7 @@ def __init__(self, **kwargs): inductive_inference_url=LARGE_INFERENCE_URL, inductive_validation_url=LARGE_INFERENCE_VAL_URL, inductive_testing_url=LARGE_INFERENCE_TEST_URL, - create_inverse_triples=True, + use_inverse_relations=True, eager=True, **kwargs, ) diff --git a/src/pykeen/datasets/literal_base.py b/src/pykeen/datasets/literal_base.py index 1a908efc8d..4f966a8c2c 100644 --- a/src/pykeen/datasets/literal_base.py +++ b/src/pykeen/datasets/literal_base.py @@ -25,7 +25,7 @@ def __init__( validation_path: Union[str, pathlib.Path, TextIO], literals_path: Union[str, pathlib.Path, TextIO], eager: bool = False, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, ) -> None: """Initialize the dataset. @@ -34,14 +34,14 @@ def __init__( :param validation_path: Path to the validation triples file or validation triples file. :param literals_path: Path to the literals triples file or literal triples file :param eager: Should the data be loaded eagerly? Defaults to false. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. """ self.training_path = training_path self.testing_path = testing_path self.validation_path = validation_path self.literals_path = literals_path - self._create_inverse_triples = create_inverse_triples + self._use_inverse_relations = use_inverse_relations if eager: self._load() @@ -51,7 +51,7 @@ def _load(self) -> None: self._training = self.triples_factory_cls.from_path( path=self.training_path, path_to_numeric_triples=self.literals_path, - create_inverse_triples=self._create_inverse_triples, + use_inverse_relations=self._use_inverse_relations, ) self._testing = self.triples_factory_cls.from_path( path=self.testing_path, diff --git a/src/pykeen/datasets/mocks.py b/src/pykeen/datasets/mocks.py index 391e0283f8..9e20efa35b 100644 --- a/src/pykeen/datasets/mocks.py +++ b/src/pykeen/datasets/mocks.py @@ -15,7 +15,7 @@ def create_inductive_dataset( num_triples_inference: int, num_triples_testing: int, random_state: int = 42, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, # num_triples_validation: Optional[int], ) -> InductiveDataset: """ @@ -33,7 +33,7 @@ def create_inductive_dataset( the number of (inductive) inference triples. defaults to `num_triples_training` :param num_triples_testing: the number of (inductive) testing triples. defaults to `num_triples_training` - :param create_inverse_triples: + :param use_inverse_relations: whether to create inverse triples :param random_state: the random state to use. @@ -46,22 +46,22 @@ def create_inductive_dataset( num_entities=num_entities_transductive, num_relations=num_relations, num_triples=num_triples_training, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, random_state=random_state, ), inductive_inference=generate_triples_factory( num_entities=num_entities_inductive, num_relations=num_relations, num_triples=num_triples_inference, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, random_state=random_state + 1, # different random states for different triples ), inductive_testing=generate_triples_factory( num_entities=num_entities_inductive, num_relations=num_relations, num_triples=num_triples_testing, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, random_state=random_state + 2, # different random states for different triples ), - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, ) diff --git a/src/pykeen/datasets/ogb.py b/src/pykeen/datasets/ogb.py index c25e86a965..aa2bea9ed3 100644 --- a/src/pykeen/datasets/ogb.py +++ b/src/pykeen/datasets/ogb.py @@ -28,15 +28,15 @@ class OGBLoader(LazyDataset): #: The name of the dataset to download name: ClassVar[str] - def __init__(self, cache_root: Optional[str] = None, create_inverse_triples: bool = False): + def __init__(self, cache_root: Optional[str] = None, use_inverse_relations: bool = False): """Initialize the OGB loader. :param cache_root: An optional override for where data should be cached. If not specified, uses default PyKEEN location with :mod:`pystow`. - :param create_inverse_triples: Should inverse triples be created? Defaults to false. + :param use_inverse_relations: Should inverse triples be created? Defaults to false. """ self.cache_root = self._help_cache(cache_root) - self._create_inverse_triples = create_inverse_triples + self._use_inverse_relations = use_inverse_relations def _load(self) -> None: try: @@ -77,7 +77,7 @@ def _make_tf(self, x, entity_to_id=None, relation_to_id=None): return TriplesFactory.from_labeled_triples( triples=triples, - create_inverse_triples=self._create_inverse_triples, + use_inverse_relations=self._use_inverse_relations, entity_to_id=entity_to_id, relation_to_id=relation_to_id, ) @@ -113,7 +113,7 @@ def _make_tf(self, x, entity_to_id=None, relation_to_id=None): return TriplesFactory.from_labeled_triples( triples=triples, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, entity_to_id=entity_to_id, relation_to_id=relation_to_id, ) diff --git a/src/pykeen/datasets/utils.py b/src/pykeen/datasets/utils.py index 4a78fe667c..571a440346 100644 --- a/src/pykeen/datasets/utils.py +++ b/src/pykeen/datasets/utils.py @@ -181,14 +181,14 @@ def _digest_kwargs(dataset_kwargs: Mapping[str, Any], ignore: Collection[str] = return base64.urlsafe_b64encode(digester.digest()).decode("utf8")[:32] -def _set_inverse_triples_(dataset_instance: Dataset, create_inverse_triples: bool) -> Dataset: - # note: we only need to set the create_inverse_triples in the training factory. - if dataset_instance.create_inverse_triples and not create_inverse_triples: +def _set_inverse_triples_(dataset_instance: Dataset, use_inverse_relations: bool) -> Dataset: + # note: we only need to set the use_inverse_relations in the training factory. + if dataset_instance.use_inverse_relations and not use_inverse_relations: assert dataset_instance.training.num_relations % 2 == 0 dataset_instance.training.num_relations //= 2 - elif create_inverse_triples and not dataset_instance.training.create_inverse_triples: + elif use_inverse_relations and not dataset_instance.training.use_inverse_relations: dataset_instance.training.num_relations *= 2 - dataset_instance.training.create_inverse_triples = create_inverse_triples + dataset_instance.training.use_inverse_relations = use_inverse_relations return dataset_instance @@ -207,7 +207,7 @@ def _cached_get_dataset( force = force or dataset_kwargs.pop("force", False) # hash kwargs - digest = _digest_kwargs(dataset_kwargs, ignore={"create_inverse_triples"}) + digest = _digest_kwargs(dataset_kwargs, ignore={"use_inverse_relations"}) # normalize dataset name dataset_cls = dataset_resolver.lookup(dataset) @@ -221,7 +221,7 @@ def _cached_get_dataset( logger.info(f"Loading cached preprocessed dataset from {path.as_uri()}") return _set_inverse_triples_( dataset_cls.from_directory_binary(path), - create_inverse_triples=dataset_kwargs.get("create_inverse_triples", False), + use_inverse_relations=dataset_kwargs.get("use_inverse_relations", False), ) # load dataset without cache diff --git a/src/pykeen/experiments/boxe/abboud2020_boxe_yago310.yaml b/src/pykeen/experiments/boxe/abboud2020_boxe_yago310.yaml index 3a9ec623b6..b0ddc7a567 100644 --- a/src/pykeen/experiments/boxe/abboud2020_boxe_yago310.yaml +++ b/src/pykeen/experiments/boxe/abboud2020_boxe_yago310.yaml @@ -6,7 +6,7 @@ pipeline: dataset: yago310 # introduce inverse relations and reciprocal facts dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true # optimization is using negative sampling self-adversarial loss. diff --git a/src/pykeen/experiments/conve/dettmers2018_conve_fb15k.json b/src/pykeen/experiments/conve/dettmers2018_conve_fb15k.json index 94cf40da25..6154c1ecdd 100644 --- a/src/pykeen/experiments/conve/dettmers2018_conve_fb15k.json +++ b/src/pykeen/experiments/conve/dettmers2018_conve_fb15k.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "fb15k", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "ConvE", "model_kwargs": { diff --git a/src/pykeen/experiments/conve/dettmers2018_conve_fb15k237.json b/src/pykeen/experiments/conve/dettmers2018_conve_fb15k237.json index 35f4c9cedc..40ab04c1f7 100644 --- a/src/pykeen/experiments/conve/dettmers2018_conve_fb15k237.json +++ b/src/pykeen/experiments/conve/dettmers2018_conve_fb15k237.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "fb15k237", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "ConvE", "model_kwargs": { diff --git a/src/pykeen/experiments/conve/dettmers2018_conve_wn18.json b/src/pykeen/experiments/conve/dettmers2018_conve_wn18.json index 66e7c53215..9f15987730 100644 --- a/src/pykeen/experiments/conve/dettmers2018_conve_wn18.json +++ b/src/pykeen/experiments/conve/dettmers2018_conve_wn18.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "wn18", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "ConvE", "model_kwargs": { diff --git a/src/pykeen/experiments/conve/dettmers2018_conve_wn18rr.json b/src/pykeen/experiments/conve/dettmers2018_conve_wn18rr.json index 9437a5e0ab..79ebc989ed 100644 --- a/src/pykeen/experiments/conve/dettmers2018_conve_wn18rr.json +++ b/src/pykeen/experiments/conve/dettmers2018_conve_wn18rr.json @@ -7,7 +7,7 @@ "dataset": "wn18rr", "model": "ConvE", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model_kwargs": { "embedding_dim": 200, diff --git a/src/pykeen/experiments/ermlp/ali2020_ermlp_fb15k237.yaml b/src/pykeen/experiments/ermlp/ali2020_ermlp_fb15k237.yaml index 41508e8f6f..db6110680f 100644 --- a/src/pykeen/experiments/ermlp/ali2020_ermlp_fb15k237.yaml +++ b/src/pykeen/experiments/ermlp/ali2020_ermlp_fb15k237.yaml @@ -4,7 +4,7 @@ metadata: pipeline: dataset: fb15k237 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: bceaftersigmoid diff --git a/src/pykeen/experiments/ermlp/ali2020_ermlp_wn18rr.yaml b/src/pykeen/experiments/ermlp/ali2020_ermlp_wn18rr.yaml index 2ff0733336..e1975c6b67 100644 --- a/src/pykeen/experiments/ermlp/ali2020_ermlp_wn18rr.yaml +++ b/src/pykeen/experiments/ermlp/ali2020_ermlp_wn18rr.yaml @@ -4,7 +4,7 @@ metadata: pipeline: dataset: wn18rr dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: softplus diff --git a/src/pykeen/experiments/inverse_stability.py b/src/pykeen/experiments/inverse_stability.py index 56a93ca84f..2e8623140b 100644 --- a/src/pykeen/experiments/inverse_stability.py +++ b/src/pykeen/experiments/inverse_stability.py @@ -64,7 +64,7 @@ def run_inverse_stability_workflow( dataset_instance: Dataset = get_dataset( dataset=dataset, dataset_kwargs=dict( - create_inverse_triples=True, + use_inverse_relations=True, ), ) dataset_name = dataset_instance.get_normalized_name() diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl.yaml index fa3436a5e9..80a90ed077 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: codexlarge dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: BCEWithLogitsLoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_noancs.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_noancs.yaml index 0022ed5c18..650a1cbd8a 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_noancs.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_noancs.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: codexlarge dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: BCEWithLogitsLoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_norels.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_norels.yaml index 4ce726eb7c..b5f0678c8e 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_norels.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_codexl_norels.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: codexlarge dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: BCEWithLogitsLoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237.yaml index b6c0254dec..67127979f2 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: fb15k237 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: BCEWithLogitsLoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_noancs.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_noancs.yaml index 2b9353603b..f2427cb8a9 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_noancs.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_noancs.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: fb15k237 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: BCEWithLogitsLoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_norels.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_norels.yaml index 97e5320563..9a93601bc3 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_norels.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_fb15k237_norels.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: fb15k237 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: BCEWithLogitsLoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr.yaml index 1e7a3a1ecb..398f56699b 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: wn18rr dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: NSSALoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_noancs.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_noancs.yaml index ecf5d8510d..0bd571fc76 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_noancs.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_noancs.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: wn18rr dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: NSSALoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_norels.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_norels.yaml index 64ae564e1f..a99ec9e177 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_norels.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_wn18rr_norels.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: wn18rr dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: NSSALoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310.yaml index 3b3ffcc57f..705291f868 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: yago310 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: NSSALoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_noancs.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_noancs.yaml index 8f19440463..c8bac8f344 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_noancs.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_noancs.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: yago310 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: NSSALoss diff --git a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_norels.yaml b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_norels.yaml index 9b5c616990..f4e19bae50 100644 --- a/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_norels.yaml +++ b/src/pykeen/experiments/nodepiece/galkin2022_nodepiece_yago310_norels.yaml @@ -4,7 +4,7 @@ pipeline: random_seed: 42 dataset: yago310 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: NSSALoss diff --git a/src/pykeen/experiments/rescal/ali2020_rescal_fb15k237.yaml b/src/pykeen/experiments/rescal/ali2020_rescal_fb15k237.yaml index 40210ffc8b..ded3df1598 100644 --- a/src/pykeen/experiments/rescal/ali2020_rescal_fb15k237.yaml +++ b/src/pykeen/experiments/rescal/ali2020_rescal_fb15k237.yaml @@ -4,7 +4,7 @@ metadata: pipeline: dataset: fb15k237 dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: crossentropy diff --git a/src/pykeen/experiments/rescal/ali2020_rescal_wn18rr.yaml b/src/pykeen/experiments/rescal/ali2020_rescal_wn18rr.yaml index 4e3d8e071c..3d8ad398f8 100644 --- a/src/pykeen/experiments/rescal/ali2020_rescal_wn18rr.yaml +++ b/src/pykeen/experiments/rescal/ali2020_rescal_wn18rr.yaml @@ -4,7 +4,7 @@ metadata: pipeline: dataset: wn18rr dataset_kwargs: - create_inverse_triples: True + use_inverse_relations: True evaluator_kwargs: filtered: true loss: crossentropy diff --git a/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k.json b/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k.json index 67209e32c9..838c2856c6 100644 --- a/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k.json +++ b/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "fb15k", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "TuckER", "model_kwargs": { diff --git a/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k237.json b/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k237.json index 1fbe3bdfb4..d35cfca864 100644 --- a/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k237.json +++ b/src/pykeen/experiments/tucker/balazevic2019_tucker_fb15k237.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "fb15k237", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "TuckER", "model_kwargs": { diff --git a/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18.json b/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18.json index 49897c0809..ef092e5f78 100644 --- a/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18.json +++ b/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "wn18", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "TuckER", "model_kwargs": { diff --git a/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18rr.json b/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18rr.json index 26c7173e3e..838c7dff42 100644 --- a/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18rr.json +++ b/src/pykeen/experiments/tucker/balazevic2019_tucker_wn18rr.json @@ -6,7 +6,7 @@ "pipeline": { "dataset": "wn18rr", "dataset_kwargs": { - "create_inverse_triples": true + "use_inverse_relations": true }, "model": "TuckER", "model_kwargs": { diff --git a/src/pykeen/models/base.py b/src/pykeen/models/base.py index 784ab30e13..a8bb86a931 100644 --- a/src/pykeen/models/base.py +++ b/src/pykeen/models/base.py @@ -97,7 +97,7 @@ def __init__( else: self.loss = loss_resolver.make(loss, pos_kwargs=loss_kwargs) - self.use_inverse_triples = triples_factory.create_inverse_triples + self.use_inverse_triples = triples_factory.use_inverse_relations self.num_entities = triples_factory.num_entities self.num_relations = triples_factory.num_relations @@ -480,7 +480,7 @@ def _prepare_inverse_batch(self, batch: torch.LongTensor, index_relation: int) - if not self.use_inverse_triples: raise ValueError( "Your model is not configured to predict with inverse relations." - " Set ``create_inverse_triples=True`` when creating the dataset/triples factory" + " Set ``use_inverse_relations=True`` when creating the dataset/triples factory" " or using the pipeline().", ) return relation_inverter.invert_(batch=batch, index=index_relation).flip(1) diff --git a/src/pykeen/models/cli/builders.py b/src/pykeen/models/cli/builders.py index da0e6f0fae..79bbc118e2 100644 --- a/src/pykeen/models/cli/builders.py +++ b/src/pykeen/models/cli/builders.py @@ -123,7 +123,7 @@ def _decorate_model_kwargs(command: click.Command) -> click.Command: @options.num_workers_option @options.random_seed_option @_decorate_model_kwargs - @options.inverse_triples_option + @options.use_inverse_relations_option @click.option("--silent", is_flag=True) @click.option("--output-directory", type=pathlib.Path, default=None, help="Where to dump the results") def main( @@ -147,7 +147,7 @@ def main( num_workers, random_seed, silent: bool, - create_inverse_triples: bool, + use_inverse_relations: bool, **model_kwargs, ): """CLI for PyKEEN.""" @@ -172,18 +172,22 @@ def main( def _triples_factory(path: Optional[str]) -> Optional[TriplesFactory]: if path is None: return None - return TriplesFactory.from_path(path=path, create_inverse_triples=create_inverse_triples) + return TriplesFactory.from_path(path=path, use_inverse_relations=use_inverse_relations) training = _triples_factory(training_triples_factory) testing = _triples_factory(testing_triples_factory) validation = _triples_factory(validation_triples_factory) + if use_inverse_relations: + model_kwargs = model_kwargs or {} + model_kwargs["use_inverse_relations"] = True + pipeline_result = pipeline( device=device, model=model, model_kwargs=model_kwargs, dataset=dataset, - dataset_kwargs=dict(create_inverse_triples=create_inverse_triples), + dataset_kwargs=dict(use_inverse_relations=use_inverse_relations), training=training, testing=testing or training, validation=validation, diff --git a/src/pykeen/models/cli/options.py b/src/pykeen/models/cli/options.py index 06acc63bb0..ea91157ffa 100644 --- a/src/pykeen/models/cli/options.py +++ b/src/pykeen/models/cli/options.py @@ -182,4 +182,6 @@ def _callback(_, __, value): show_default=True, help="Random seed for PyTorch, NumPy, and Python.", ) -inverse_triples_option = click.option("-I", "--create-inverse-triples", is_flag=True, help="Model inverse triples") +use_inverse_relations_option = click.option( + "-I", "--use-inverse-relations", is_flag=True, help="Model inverse relations" +) diff --git a/src/pykeen/models/inductive/inductive_nodepiece.py b/src/pykeen/models/inductive/inductive_nodepiece.py index 764ad55b7d..5a0c6ab9ca 100644 --- a/src/pykeen/models/inductive/inductive_nodepiece.py +++ b/src/pykeen/models/inductive/inductive_nodepiece.py @@ -64,13 +64,13 @@ def __init__( Initialize the model. :param triples_factory: - the triples factory of training triples. Must have create_inverse_triples set to True. + the triples factory of training triples. Must have use_inverse_relations set to True. :param inference_factory: - the triples factory of inference triples. Must have create_inverse_triples set to True. + the triples factory of inference triples. Must have use_inverse_relations set to True. :param validation_factory: - the triples factory of validation triples. Must have create_inverse_triples set to True. + the triples factory of validation triples. Must have use_inverse_relations set to True. :param test_factory: - the triples factory of testing triples. Must have create_inverse_triples set to True. + the triples factory of testing triples. Must have use_inverse_relations set to True. :param num_tokens: the number of relations to use to represent each entity, cf. :class:`pykeen.nn.NodePieceRepresentation`. @@ -100,7 +100,7 @@ def __init__( :raises ValueError: if the triples factory does not create inverse triples """ - if not triples_factory.create_inverse_triples: + if not triples_factory.use_inverse_relations: raise ValueError( "The provided triples factory does not create inverse triples. However, for the node piece " "representations inverse relation representations are required.", diff --git a/src/pykeen/models/unimodal/conv_e.py b/src/pykeen/models/unimodal/conv_e.py index 8e7473db5f..84636bb54b 100644 --- a/src/pykeen/models/unimodal/conv_e.py +++ b/src/pykeen/models/unimodal/conv_e.py @@ -62,7 +62,7 @@ class ConvE(ERModel): >>> # Step 1: Get triples >>> from pykeen.datasets import Nations - >>> dataset = Nations(create_inverse_triples=True) + >>> dataset = Nations(use_inverse_relations=True) >>> # Step 2: Configure the model >>> from pykeen.models import ConvE >>> model = ConvE( @@ -139,10 +139,10 @@ def __init__( ) -> None: """Initialize the model.""" # ConvE should be trained with inverse triples - if not triples_factory.create_inverse_triples: + if not triples_factory.use_inverse_relations: logger.warning( "\nThe ConvE model should be trained with inverse triples.\n" - "This can be done by defining the TriplesFactory class with the _create_inverse_triples_ parameter set " + "This can be done by defining the TriplesFactory class with the _use_inverse_relations_ parameter set " "to true.", ) diff --git a/src/pykeen/models/unimodal/node_piece.py b/src/pykeen/models/unimodal/node_piece.py index 90180ec971..7c58b5bc6d 100644 --- a/src/pykeen/models/unimodal/node_piece.py +++ b/src/pykeen/models/unimodal/node_piece.py @@ -66,7 +66,7 @@ def __init__( Initialize the model. :param triples_factory: - the triples factory. Must have create_inverse_triples set to True. + the triples factory. Must have use_inverse_relations set to True. :param num_tokens: the number of relations to use to represent each entity, cf. :class:`pykeen.nn.NodePieceRepresentation`. @@ -114,7 +114,7 @@ def __init__( :raises ValueError: if the triples factory does not create inverse triples """ - if not triples_factory.create_inverse_triples: + if not triples_factory.use_inverse_relations: raise ValueError( "The provided triples factory does not create inverse triples. However, for the node piece " "representations inverse relation representations are required.", diff --git a/src/pykeen/nn/message_passing.py b/src/pykeen/nn/message_passing.py index 014a3f4b0c..ece5cbed33 100644 --- a/src/pykeen/nn/message_passing.py +++ b/src/pykeen/nn/message_passing.py @@ -584,7 +584,7 @@ def __init__( raise ValueError( f"max_id={max_id} differs from triples_factory.num_entities={triples_factory.num_entities}" ) - if triples_factory.create_inverse_triples: + if triples_factory.use_inverse_relations: raise ValueError( "RGCN internally creates inverse triples. It thus expects a triples factory without them.", ) diff --git a/src/pykeen/nn/node_piece/representation.py b/src/pykeen/nn/node_piece/representation.py index 0300f537d7..10273ee736 100644 --- a/src/pykeen/nn/node_piece/representation.py +++ b/src/pykeen/nn/node_piece/representation.py @@ -266,7 +266,7 @@ def __init__( # normalize triples mapped_triples = triples_factory.mapped_triples - if triples_factory.create_inverse_triples: + if triples_factory.use_inverse_relations: # inverse triples are created afterwards implicitly mapped_triples = mapped_triples[mapped_triples[:, 1] < triples_factory.real_num_relations] diff --git a/src/pykeen/nn/pyg.py b/src/pykeen/nn/pyg.py index 7604c0bcb1..e840fa308e 100644 --- a/src/pykeen/nn/pyg.py +++ b/src/pykeen/nn/pyg.py @@ -26,7 +26,7 @@ from pykeen.nn.init import LabelBasedInitializer from pykeen.pipeline import pipeline - dataset = get_dataset(dataset="nations", dataset_kwargs=dict(create_inverse_triples=True)) + dataset = get_dataset(dataset="nations", dataset_kwargs=dict(use_inverse_relations=True)) entity_initializer = LabelBasedInitializer.from_triples_factory( triples_factory=dataset.training, for_entities=True, diff --git a/src/pykeen/nn/representation.py b/src/pykeen/nn/representation.py index 49607f8278..45e4df65f6 100644 --- a/src/pykeen/nn/representation.py +++ b/src/pykeen/nn/representation.py @@ -816,7 +816,7 @@ def __init__( """ super().__init__() # TODO: Check - assert triples_factory.create_inverse_triples + assert triples_factory.use_inverse_relations self.entity_representations = build_representation( max_id=triples_factory.num_entities, representation=entity_representations, diff --git a/src/pykeen/triples/deteriorate.py b/src/pykeen/triples/deteriorate.py index c818154170..38fe67e245 100644 --- a/src/pykeen/triples/deteriorate.py +++ b/src/pykeen/triples/deteriorate.py @@ -40,7 +40,7 @@ def deteriorate( :raises ValueError: If a float is given for n that isn't between 0 and 1 """ # TODO: take care that triples aren't removed that are the only ones with any given entity - if reference.create_inverse_triples: + if reference.use_inverse_relations: raise NotImplementedError if isinstance(n, float): diff --git a/src/pykeen/triples/generation.py b/src/pykeen/triples/generation.py index 4ef6c84d30..24569bcdb0 100644 --- a/src/pykeen/triples/generation.py +++ b/src/pykeen/triples/generation.py @@ -50,7 +50,7 @@ def generate_triples_factory( num_relations: int = 7, num_triples: int = 101, random_state: TorchRandomHint = None, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, ) -> CoreTriplesFactory: """Generate a triples factory with random triples.""" mapped_triples = generate_triples( @@ -61,5 +61,5 @@ def generate_triples_factory( ) return CoreTriplesFactory.create( mapped_triples=mapped_triples, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, ) diff --git a/src/pykeen/triples/leakage.py b/src/pykeen/triples/leakage.py index 73dee84c0c..1cfca92e65 100644 --- a/src/pykeen/triples/leakage.py +++ b/src/pykeen/triples/leakage.py @@ -369,7 +369,7 @@ def reindex(*triples_factories: CoreTriplesFactory) -> List[CoreTriplesFactory]: entity_translation=entity_id_translation, relation_translation=relation_id_translation, ), - create_inverse_triples=factory.create_inverse_triples, + use_inverse_relations=factory.use_inverse_relations, ) for factory in triples_factories ] diff --git a/src/pykeen/triples/remix.py b/src/pykeen/triples/remix.py index 219ebddbc3..a1ccd31e2a 100644 --- a/src/pykeen/triples/remix.py +++ b/src/pykeen/triples/remix.py @@ -30,10 +30,10 @@ def remix(*triples_factories: CoreTriplesFactory, **kwargs) -> List[CoreTriplesF :param kwargs: Keyword arguments to be passed to :func:`split` :returns: A sequence of triples factories of the same sizes but randomly re-assigned triples - :raises NotImplementedError: if any of the triples factories have ``create_inverse_triples`` + :raises NotImplementedError: if any of the triples factories have ``use_inverse_relations`` """ for tf in triples_factories: - if tf.create_inverse_triples: + if tf.use_inverse_relations: raise NotImplementedError("The remix algorithm is not implemented for datasets with inverse triples") all_triples = cat_triples(*triples_factories) diff --git a/src/pykeen/triples/triples_factory.py b/src/pykeen/triples/triples_factory.py index e9d9352a0c..376c7b7e49 100644 --- a/src/pykeen/triples/triples_factory.py +++ b/src/pykeen/triples/triples_factory.py @@ -337,7 +337,7 @@ class KGInfo(ExtraReprMixin): num_relations: int #: whether to create inverse triples - create_inverse_triples: bool + use_inverse_relations: bool #: the number of real relations, i.e., without artificial inverses real_num_relations: int @@ -346,7 +346,7 @@ def __init__( self, num_entities: int, num_relations: int, - create_inverse_triples: bool, + use_inverse_relations: bool, ) -> None: """ Initialize the information object. @@ -355,22 +355,22 @@ def __init__( the number of entities. :param num_relations: the number of relations, excluding artifical inverse relations. - :param create_inverse_triples: + :param use_inverse_relations: whether to create inverse triples """ self.num_entities = num_entities self.real_num_relations = num_relations - if create_inverse_triples: + if use_inverse_relations: num_relations *= 2 self.num_relations = num_relations - self.create_inverse_triples = create_inverse_triples + self.use_inverse_relations = use_inverse_relations def iter_extra_repr(self) -> Iterable[str]: """Iterate over extra_repr components.""" yield from super().iter_extra_repr() yield f"num_entities={self.num_entities}" yield f"num_relations={self.num_relations}" - yield f"create_inverse_triples={self.create_inverse_triples}" + yield f"use_inverse_relations={self.use_inverse_relations}" class CoreTriplesFactory(KGInfo): @@ -384,7 +384,7 @@ def __init__( mapped_triples: Union[MappedTriples, np.ndarray], num_entities: int, num_relations: int, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, metadata: Optional[Mapping[str, Any]] = None, ): """ @@ -396,7 +396,7 @@ def __init__( The number of entities. :param num_relations: The number of relations. - :param create_inverse_triples: + :param use_inverse_relations: Whether to create inverse triples. :param metadata: Arbitrary metadata to go with the graph @@ -409,7 +409,7 @@ def __init__( super().__init__( num_entities=num_entities, num_relations=num_relations, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, ) # ensure torch.Tensor mapped_triples = torch.as_tensor(mapped_triples) @@ -430,7 +430,7 @@ def create( mapped_triples: MappedTriples, num_entities: Optional[int] = None, num_relations: Optional[int] = None, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, metadata: Optional[Mapping[str, Any]] = None, ) -> "CoreTriplesFactory": """ @@ -442,7 +442,7 @@ def create( The number of entities. If not given, inferred from mapped_triples. :param num_relations: The number of relations. If not given, inferred from mapped_triples. - :param create_inverse_triples: + :param use_inverse_relations: Whether to create inverse triples. :param metadata: Additional metadata to store in the factory. @@ -458,7 +458,7 @@ def create( mapped_triples=mapped_triples, num_entities=num_entities, num_relations=num_relations, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, metadata=metadata, ) @@ -469,7 +469,7 @@ def __eq__(self, __o: object) -> bool: # noqa: D105 (self.num_entities == __o.num_entities) and (self.num_relations == __o.num_relations) and (self.num_triples == __o.num_triples) - and (self.create_inverse_triples == __o.create_inverse_triples) + and (self.use_inverse_relations == __o.use_inverse_relations) and bool((self.mapped_triples == __o.mapped_triples).all().item()) ) @@ -506,19 +506,19 @@ def with_labels( mapped_triples=self.mapped_triples, entity_to_id=entity_to_id, relation_to_id=relation_to_id, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, metadata=self.metadata, ) def get_inverse_relation_id(self, relation: int) -> int: """Get the inverse relation identifier for the given relation.""" - if not self.create_inverse_triples: + if not self.use_inverse_relations: raise ValueError("Can not get inverse triple, they have not been created.") return relation_inverter.get_inverse_id(relation_id=relation) def _add_inverse_triples_if_necessary(self, mapped_triples: MappedTriples) -> MappedTriples: """Add inverse triples if they shall be created.""" - if not self.create_inverse_triples: + if not self.use_inverse_relations: return mapped_triples logger.info("Creating inverse triples.") @@ -579,7 +579,7 @@ def clone_and_exchange_triples( mapped_triples: MappedTriples, extra_metadata: Optional[Dict[str, Any]] = None, keep_metadata: bool = True, - create_inverse_triples: Optional[bool] = None, + use_inverse_relations: Optional[bool] = None, ) -> "CoreTriplesFactory": """ Create a new triples factory sharing everything except the triples. @@ -594,19 +594,19 @@ def clone_and_exchange_triples( the dictionaries will be unioned with precedence taken on keys from ``extra_metadata``. :param keep_metadata: Pass the current factory's metadata to the new triples factory - :param create_inverse_triples: + :param use_inverse_relations: Change inverse triple creation flag. If None, use flag from this factory. :return: The new factory. """ - if create_inverse_triples is None: - create_inverse_triples = self.create_inverse_triples + if use_inverse_relations is None: + use_inverse_relations = self.use_inverse_relations return CoreTriplesFactory( mapped_triples=mapped_triples, num_entities=self.num_entities, num_relations=self.real_num_relations, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, metadata={ **(extra_metadata or {}), **(self.metadata if keep_metadata else {}), # type: ignore @@ -661,7 +661,7 @@ def split( self.clone_and_exchange_triples( mapped_triples=triples, # do not explicitly create inverse triples for testing; this is handled by the evaluation code - create_inverse_triples=None if i == 0 else False, + use_inverse_relations=None if i == 0 else False, ) for i, triples in enumerate( split( @@ -852,9 +852,9 @@ def to_path_binary( def _get_binary_state(self): return dict( num_entities=self.num_entities, - # note: num_relations will be doubled again when instantiating with create_inverse_triples=True + # note: num_relations will be doubled again when instantiating with use_inverse_relations=True num_relations=self.real_num_relations, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, metadata=self.metadata, ) @@ -870,7 +870,7 @@ def __init__( mapped_triples: MappedTriples, entity_to_id: EntityMapping, relation_to_id: RelationMapping, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, metadata: Optional[Mapping[str, Any]] = None, num_entities: Optional[int] = None, num_relations: Optional[int] = None, @@ -884,7 +884,7 @@ def __init__( The mapping from entities' labels to their indices. :param relation_to_id: The mapping from relations' labels to their indices. - :param create_inverse_triples: + :param use_inverse_relations: Whether to create inverse triples. :param metadata: Arbitrary metadata to go with the graph @@ -917,7 +917,7 @@ def __init__( mapped_triples=mapped_triples, num_entities=num_entities, num_relations=num_relations, - create_inverse_triples=create_inverse_triples, + use_inverse_relations = use_inverse_relations, metadata=metadata, ) @@ -926,7 +926,7 @@ def from_labeled_triples( cls, triples: LabeledTriples, *, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, entity_to_id: Optional[EntityMapping] = None, relation_to_id: Optional[RelationMapping] = None, compact_id: bool = True, @@ -938,7 +938,7 @@ def from_labeled_triples( :param triples: shape: (n, 3), dtype: str The label-based triples. - :param create_inverse_triples: + :param use_inverse_relations: Whether to create inverse triples. :param entity_to_id: The mapping from entity labels to ID. If None, create a new one from the triples. @@ -956,7 +956,7 @@ def from_labeled_triples( """ # Check if the triples are inverted already # We re-create them pure index based to ensure that _all_ inverse triples are present and that they are - # contained if and only if create_inverse_triples is True. + # contained if and only if use_inverse_relations is True. if filter_out_candidate_inverse_relations: unique_relations, inverse = np.unique(triples[:, 1], return_inverse=True) suspected_to_be_inverse_relations = {r for r in unique_relations if r.endswith(INVERSE_SUFFIX)} @@ -996,7 +996,7 @@ def from_labeled_triples( entity_to_id=entity_to_id, relation_to_id=relation_to_id, mapped_triples=mapped_triples, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, metadata=metadata, ) @@ -1005,7 +1005,7 @@ def from_path( cls, path: Union[str, pathlib.Path, TextIO], *, - create_inverse_triples: bool = False, + use_inverse_relations: bool = False, entity_to_id: Optional[EntityMapping] = None, relation_to_id: Optional[RelationMapping] = None, compact_id: bool = True, @@ -1018,7 +1018,7 @@ def from_path( :param path: The path where the label-based triples are stored. - :param create_inverse_triples: + :param use_inverse_relations: Whether to create inverse triples. :param entity_to_id: The mapping from entity labels to ID. If None, create a new one from the triples. @@ -1045,7 +1045,7 @@ def from_path( return cls.from_labeled_triples( triples=triples, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, entity_to_id=entity_to_id, relation_to_id=relation_to_id, compact_id=compact_id, @@ -1069,7 +1069,7 @@ def to_core_triples_factory(self) -> CoreTriplesFactory: mapped_triples=self.mapped_triples, num_entities=self.num_entities, num_relations=self.num_relations, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, metadata=self.metadata, ) @@ -1105,21 +1105,28 @@ def _from_path_binary(cls, path: pathlib.Path) -> MutableMapping[str, Any]: data[name] = dict(zip(df["label"], df["id"])) return data + # docstr-coverage: inherited + def _get_binary_state(self): # noqa: D102 + return dict( + use_inverse_relations=self.use_inverse_relations, + metadata=self.metadata, + ) + # docstr-coverage: inherited def clone_and_exchange_triples( self, mapped_triples: MappedTriples, extra_metadata: Optional[Dict[str, Any]] = None, keep_metadata: bool = True, - create_inverse_triples: Optional[bool] = None, + use_inverse_relations: Optional[bool] = None, ) -> "TriplesFactory": # noqa: D102 - if create_inverse_triples is None: - create_inverse_triples = self.create_inverse_triples + if use_inverse_relations is None: + use_inverse_relations = self.use_inverse_relations return TriplesFactory( entity_to_id=self.entity_to_id, relation_to_id=self.relation_to_id, mapped_triples=mapped_triples, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, metadata={ **(extra_metadata or {}), **(self.metadata if keep_metadata else {}), # type: ignore diff --git a/src/pykeen/triples/triples_numeric_literals_factory.py b/src/pykeen/triples/triples_numeric_literals_factory.py index 933483676b..081d7407a4 100644 --- a/src/pykeen/triples/triples_numeric_literals_factory.py +++ b/src/pykeen/triples/triples_numeric_literals_factory.py @@ -103,7 +103,7 @@ def from_labeled_triples( entity_to_id=base.entity_to_id, relation_to_id=base.relation_to_id, mapped_triples=base.mapped_triples, - create_inverse_triples=base.create_inverse_triples, + use_inverse_relations=base.use_inverse_relations, numeric_literals=numeric_literals, literals_to_id=literals_to_id, ) @@ -128,15 +128,15 @@ def clone_and_exchange_triples( mapped_triples: MappedTriples, extra_metadata: Optional[Dict[str, Any]] = None, keep_metadata: bool = True, - create_inverse_triples: Optional[bool] = None, + use_inverse_relations: Optional[bool] = None, ) -> "TriplesNumericLiteralsFactory": # noqa: D102 - if create_inverse_triples is None: - create_inverse_triples = self.create_inverse_triples + if use_inverse_relations is None: + use_inverse_relations = self.use_inverse_relations return TriplesNumericLiteralsFactory( mapped_triples=mapped_triples, entity_to_id=self.entity_to_id, relation_to_id=self.relation_to_id, - create_inverse_triples=create_inverse_triples, + use_inverse_relations=use_inverse_relations, metadata={ **(extra_metadata or {}), **(self.metadata if keep_metadata else {}), # type: ignore diff --git a/tests/cases.py b/tests/cases.py index 314145a945..adeef75724 100644 --- a/tests/cases.py +++ b/tests/cases.py @@ -931,7 +931,7 @@ class ModelTestCase(unittest_templates.GenericTestCase[Model]): embedding_dim: int = 3 #: Whether to create inverse triples (needed e.g. by ConvE) - create_inverse_triples: bool = False + use_inverse_relations: bool = False #: The sampler to use for sLCWA (different e.g. for R-GCN) sampler: Optional[str] = None @@ -965,7 +965,7 @@ def pre_setup_hook(self) -> None: # noqa: D102 def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) - dataset = Nations(create_inverse_triples=self.create_inverse_triples) + dataset = Nations(use_inverse_relations=self.use_inverse_relations) self.factory = dataset.training # insert shared parameters kwargs["triples_factory"] = self.factory @@ -1038,7 +1038,7 @@ def _test_score( self.skipTest(str(e)) else: raise e - if score is self.instance.score_r and self.create_inverse_triples: + if score is self.instance.score_r and self.use_inverse_relations: # TODO: look into score_r for inverse relations logger.warning("score_r's shape is not clear yet for models with inverse relations") else: @@ -1201,8 +1201,8 @@ def _cli_extras(self): ] extras.extend(self.cli_extras) - # Make sure that inverse triples are created if create_inverse_triples=True - if self.create_inverse_triples: + # Make sure that inverse triples are created if use_inverse_relations=True + if self.use_inverse_relations: extras.append("--create-inverse-triples") extras = [str(e) for e in extras] @@ -1223,7 +1223,7 @@ def test_pipeline_nations_early_stopper(self): model=self.cls, model_kwargs=model_kwargs, dataset="nations", - dataset_kwargs=dict(create_inverse_triples=self.create_inverse_triples), + dataset_kwargs=dict(use_inverse_relations=self.use_inverse_relations), stopper="early", training_loop_kwargs=self.training_loop_kwargs, stopper_kwargs=dict(frequency=1), @@ -1421,7 +1421,7 @@ class BaseNodePieceTest(ModelTestCase): """Test the NodePiece model.""" cls = pykeen.models.NodePiece - create_inverse_triples = True + use_inverse_relations = True def _help_test_cli(self, args): # noqa: D102 if self.instance_kwargs.get("tokenizers_kwargs"): @@ -1448,12 +1448,12 @@ def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMa num_triples_training=self.num_triples_training, num_triples_inference=self.num_triples_inference, num_triples_testing=self.num_triples_testing, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, ) training_loop_kwargs = dict(self.training_loop_kwargs or dict()) training_loop_kwargs["mode"] = self.mode InductiveModelTestCase.training_loop_kwargs = training_loop_kwargs - # dataset = InductiveFB15k237(create_inverse_triples=self.create_inverse_triples) + # dataset = InductiveFB15k237(use_inverse_relations=self.use_inverse_relations) kwargs["triples_factory"] = self.factory = dataset.transductive_training kwargs["inference_factory"] = dataset.inductive_inference return kwargs @@ -1543,7 +1543,7 @@ class TriplesFactoryRepresentationTestCase(RepresentationTestCase): num_entities: ClassVar[int] num_relations: ClassVar[int] = 7 num_triples: ClassVar[int] = 31 - create_inverse_triples: bool = False + use_inverse_relations: bool = False def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # noqa: D102 self.num_entities = self.max_id @@ -1552,7 +1552,7 @@ def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMa num_entities=self.max_id, num_relations=self.num_relations, num_triples=self.num_triples, - create_inverse_triples=self.create_inverse_triples, + use_inverse_relations=self.use_inverse_relations, ) return kwargs @@ -2063,7 +2063,7 @@ def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMa num_entities=self.max_id, num_relations=self.num_relations, num_triples=self.num_triples, - create_inverse_triples=False, + use_inverse_relations=False, ) # inferred from triples factory kwargs.pop("max_id") diff --git a/tests/test_datasets/test_loading.py b/tests/test_datasets/test_loading.py index fc9773b107..f7e77020cf 100644 --- a/tests/test_datasets/test_loading.py +++ b/tests/test_datasets/test_loading.py @@ -162,12 +162,12 @@ class TestPathDatasetTriples(cases.LocalDatasetTestCase): exp_num_triples = 1992 dataset_cls = Nations - def test_create_inverse_triples(self): + def test_use_inverse_relations(self): """Verify that inverse triples are only created in the training factory.""" - dataset = Nations(create_inverse_triples=True) - assert dataset.training.create_inverse_triples - assert not dataset.testing.create_inverse_triples - assert not dataset.validation.create_inverse_triples + dataset = Nations(use_inverse_relations=True) + assert dataset.training.use_inverse_relations + assert not dataset.testing.use_inverse_relations + assert not dataset.validation.use_inverse_relations class TestPathDataset(cases.LocalDatasetTestCase): diff --git a/tests/test_lightning.py b/tests/test_lightning.py index 67014774aa..63db544d06 100644 --- a/tests/test_lightning.py +++ b/tests/test_lightning.py @@ -70,8 +70,8 @@ def test_lit_training(model, model_kwargs, training_loop): """Test training models with PyTorch Lightning.""" # some models require inverse relations - create_inverse_triples = model is not models.RGCN - dataset = get_dataset(dataset="nations", dataset_kwargs=dict(create_inverse_triples=create_inverse_triples)) + use_inverse_relations = model is not models.RGCN + dataset = get_dataset(dataset="nations", dataset_kwargs=dict(use_inverse_relations=use_inverse_relations)) # some model require access to the training triples if "triples_factory" in model_kwargs: diff --git a/tests/test_model_mode.py b/tests/test_model_mode.py index 0928296d3a..a348cc3600 100644 --- a/tests/test_model_mode.py +++ b/tests/test_model_mode.py @@ -128,4 +128,4 @@ class MinimalTriplesFactory: num_entities = 2 num_relations = 2 - create_inverse_triples: bool = False + use_inverse_relations: bool = False diff --git a/tests/test_models.py b/tests/test_models.py index 39e9bea363..ba85431516 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -48,7 +48,7 @@ class TestCompGCN(cases.ModelTestCase): """Test the CompGCN model.""" cls = pykeen.models.CompGCN - create_inverse_triples = True + use_inverse_relations = True num_constant_init = 3 # BN(2) + Bias cli_extras = ["--create-inverse-triples"] @@ -77,7 +77,7 @@ class TestConvE(cases.ModelTestCase): cls = pykeen.models.ConvE embedding_dim = 12 - create_inverse_triples = True + use_inverse_relations = True kwargs = { "output_channels": 2, "embedding_height": 3, @@ -190,7 +190,7 @@ def test_disconnected(self): [[0, 0, 1], [1, 1, 0], [3, 1, 0], [3, 2, 1]], dtype=torch.long ) # node ID 2 is missing as a disconnected node factory = CoreTriplesFactory.create( - mapped_triples=edges, num_entities=4, num_relations=3, create_inverse_triples=True + mapped_triples=edges, num_entities=4, num_relations=3, use_inverse_relations=True ) pykeen.models.NodePiece(triples_factory=factory, num_tokens=2) @@ -270,7 +270,7 @@ class TestInductiveNodePiece(cases.InductiveModelTestCase): """Test the InductiveNodePiece model.""" cls = pykeen.models.InductiveNodePiece - create_inverse_triples = True + use_inverse_relations = True class TestInductiveNodePieceGNN(cases.InductiveModelTestCase): @@ -278,7 +278,7 @@ class TestInductiveNodePieceGNN(cases.InductiveModelTestCase): cls = pykeen.models.InductiveNodePieceGNN num_constant_init = 6 - create_inverse_triples = True + use_inverse_relations = True train_batch_size = 8 @@ -755,6 +755,72 @@ def test_has_hpo_defaults(self): # noqa: D102 raise unittest.SkipTest(f"Base class {self.cls} does not provide HPO defaults.") + +class InverseRelationPredictionTests(unittest_templates.GenericTestCase[pykeen.models.FixedModel]): + """Test for prediction with inverse relations.""" + + cls = pykeen.models.FixedModel + + def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + # create triples factory with inverse relations + kwargs = super()._pre_instantiation_hook(kwargs=kwargs) + kwargs["triples_factory"] = self.factory = Nations(use_inverse_relations=True).training + return kwargs + + def _combination_batch( + self, + heads: bool = True, + relations: bool = True, + tails: bool = True, + ) -> torch.LongTensor: + """Generate a batch with all combinations.""" + factors = [] + if heads: + factors.append(range(self.factory.num_entities)) + if relations: + factors.append(range(self.factory.real_num_relations)) + if tails: + factors.append(range(self.factory.num_entities)) + return torch.as_tensor( + data=list(itertools.product(*factors)), + dtype=torch.long, + ) + + def test_predict_hrt(self): + """Test predict_hrt.""" + hrt_batch = self._combination_batch() + expected_scores = self.instance._generate_fake_scores( + h=hrt_batch[:, 0], + r=2 * hrt_batch[:, 1], + t=hrt_batch[:, 2], + ).unsqueeze(dim=-1) + scores = self.instance.predict_hrt(hrt_batch=hrt_batch) + assert torch.allclose(scores, expected_scores) + + def test_predict_h(self): + """Test predict_h.""" + rt_batch = self._combination_batch(heads=False) + # head prediction via inverse tail prediction + expected_scores = self.instance._generate_fake_scores( + h=rt_batch[:, 1, None], + r=2 * rt_batch[:, 0, None] + 1, + t=torch.arange(self.factory.num_entities).unsqueeze(dim=0), + ) + scores = self.instance.predict_h(rt_batch=rt_batch) + assert torch.allclose(scores, expected_scores) + + def test_predict_t(self): + """Test predict_t.""" + hr_batch = self._combination_batch(tails=False) + expected_scores = self.instance._generate_fake_scores( + h=hr_batch[:, 0, None], + r=2 * hr_batch[:, 1, None], + t=torch.arange(self.factory.num_entities).unsqueeze(dim=0), + ) + scores = self.instance.predict_t(hr_batch=hr_batch) + assert torch.allclose(scores, expected_scores) + + class CooccurrenceFilteredModelTests(cases.ModelTestCase): """Tests for the filtered meta model.""" diff --git a/tests/test_nn/test_representation.py b/tests/test_nn/test_representation.py index 1bd45a299c..142f5f36f1 100644 --- a/tests/test_nn/test_representation.py +++ b/tests/test_nn/test_representation.py @@ -76,7 +76,7 @@ class TestSingleCompGCNRepresentationTests(cases.TriplesFactoryRepresentationTes cls = pykeen.nn.representation.SingleCompGCNRepresentation dim: ClassVar[int] = 3 - create_inverse_triples = True + use_inverse_relations = True def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index a18f759f66..76ea55ba4a 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -115,7 +115,7 @@ def test_consume_scores(num_entities: int, num_relations: int): """Test for consume_scores.""" dataset = pykeen.predict.AllPredictionDataset(num_entities=num_entities, num_relations=num_relations) model = pykeen.models.mocks.FixedModel( - triples_factory=KGInfo(num_entities=num_entities, num_relations=num_relations, create_inverse_triples=False) + triples_factory=KGInfo(num_entities=num_entities, num_relations=num_relations, use_inverse_relations=False) ) consumer = pykeen.predict.CountScoreConsumer() pykeen.predict.consume_scores(model, dataset, consumer) @@ -128,7 +128,7 @@ def _iter_predict_all_inputs() -> Iterable[Tuple[pykeen.models.Model, Optional[i # use a small model, since operation is expensive num_entities, num_relations = 3, 2 model = pykeen.models.mocks.FixedModel( - triples_factory=KGInfo(num_entities=num_entities, num_relations=num_relations, create_inverse_triples=False) + triples_factory=KGInfo(num_entities=num_entities, num_relations=num_relations, use_inverse_relations=False) ) # all scores, automatic batch size yield model, None, pykeen.typing.LABEL_TAIL, None diff --git a/tests/test_triples_factory.py b/tests/test_triples_factory.py index 33a909063b..6251c455db 100644 --- a/tests/test_triples_factory.py +++ b/tests/test_triples_factory.py @@ -84,7 +84,7 @@ def test_correct_inverse_creation(self): ["e1", "a", "e2"], ] t = np.array(t, dtype=str) - factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=True) + factory = TriplesFactory.from_labeled_triples(triples=t, use_inverse_relations=True) instances = factory.create_slcwa_instances() assert len(instances) == 4 @@ -92,22 +92,22 @@ def test_automatic_incomplete_inverse_detection(self): """Test detecting that the triples contain inverses, warns about them, and filters them out.""" # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains # inverse relations, although the triples contained in it are not the same we would have when removing the - # first triple, and passing create_inverse_triples=True. + # first triple, and passing use_inverse_relations=True. t = [ ["e3", f"a.{INVERSE_SUFFIX}", "e10"], ["e1", "a", "e2"], ["e1", "a.", "e5"], ] t = np.array(t, dtype=str) - for create_inverse_triples in (False, True): + for use_inverse_relations in (False, True): with patch("pykeen.triples.triples_factory.logger.warning") as warning: - factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=create_inverse_triples) + factory = TriplesFactory.from_labeled_triples(triples=t, use_inverse_relations=use_inverse_relations) # check for warning warning.assert_called() # check for filtered triples assert factory.num_triples == 2 # check for correct inverse triples flag - assert factory.create_inverse_triples == create_inverse_triples + assert factory.use_inverse_relations == use_inverse_relations def test_id_to_label(self): """Test ID-to-label conversion.""" @@ -158,7 +158,7 @@ def _test_restriction( assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over - assert original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples + assert original_triples_factory.use_inverse_relations == restricted_triples_factory.use_inverse_relations # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id @@ -198,7 +198,7 @@ def test_new_with_restriction(self): } for inverse_triples in (True, False): original_triples_factory = Nations( - create_inverse_triples=inverse_triples, + use_inverse_relations=inverse_triples, ).training # Test different combinations of restrictions for ( @@ -252,13 +252,13 @@ def test_create_lcwa_instances(self): def test_split_inverse_triples(self): """Test whether inverse triples are only created in the training factory.""" # set create inverse triple to true - self.factory.create_inverse_triples = True + self.factory.use_inverse_relations = True # split factory train, *others = self.factory.split() # check that in *training* inverse triple are to be created - assert train.create_inverse_triples + assert train.use_inverse_relations # check that in all other splits no inverse triples are to be created - assert not any(f.create_inverse_triples for f in others) + assert not any(f.use_inverse_relations for f in others) @needs_packages("wordcloud", "IPython") def test_entity_word_cloud(self): @@ -382,7 +382,7 @@ def test_triples(self): def test_inverse_triples(self): """Test that the right number of entities and triples exist after inverting them.""" - triples_factory = TriplesFactory.from_labeled_triples(triples=triples, create_inverse_triples=True) + triples_factory = TriplesFactory.from_labeled_triples(triples=triples, use_inverse_relations=True) self.assertEqual(4, triples_factory.num_relations) self.assertEqual( set(range(triples_factory.num_entities)), @@ -503,7 +503,7 @@ def test_core_binary(self): def test_core_binary_inverse_relations(self): """Test binary i/o on core triples factory with inverse relations.""" - tf1 = Nations(create_inverse_triples=True).training.to_core_triples_factory() + tf1 = Nations(use_inverse_relations=True).training.to_core_triples_factory() self.assert_binary_io(tf1, CoreTriplesFactory) def assert_binary_io(self, tf, tf_cls): @@ -528,7 +528,7 @@ def assert_tf_equal(self, tf1, tf2) -> None: self.assertEqual(tf1.metadata, tf2.metadata) self.assertEqual(tf1.num_entities, tf2.num_entities) self.assertEqual(tf1.num_relations, tf2.num_relations) - self.assertEqual(tf1.create_inverse_triples, tf2.create_inverse_triples) + self.assertEqual(tf1.use_inverse_relations, tf2.use_inverse_relations) self.assertEqual( tf1.mapped_triples.detach().cpu().numpy().tolist(), tf2.mapped_triples.detach().cpu().numpy().tolist(),