Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Python type hints #708

Merged
merged 28 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f5f32dd
Run "monkeytype apply --pep_563" on all modules
juhoinkinen May 23, 2023
c8a04cb
Use dict instead of OrderedDict
juhoinkinen May 23, 2023
7ed97a5
Make imports that were too eagerly made conditional default again
juhoinkinen May 23, 2023
1a36c09
Fix flake8 errors
juhoinkinen May 23, 2023
1660e56
Turn forward references non-strings
juhoinkinen May 23, 2023
61decb7
Use less specific types
juhoinkinen May 23, 2023
ba77e84
Remove "Union[Any," in hints
juhoinkinen May 24, 2023
2f9ee10
Move comment back to its original place
juhoinkinen May 24, 2023
d2c5e53
Fix some omissions and errors by monkeytype
juhoinkinen May 24, 2023
f242a98
Simplify hints using float for Union[int, float]
juhoinkinen May 24, 2023
7c3c5dc
Simplify hints using Sequence for Union[Tuple, List]
juhoinkinen May 24, 2023
961dd09
Remove too wide usage of Any (e.g. in Unions, Lists, Iterators)
juhoinkinen May 25, 2023
33cdcf2
Unify type of params to Dict[str, Any] or DefaultDict[str, Dict
juhoinkinen May 25, 2023
bb9951f
Simplify overly complex types
juhoinkinen May 25, 2023
c405d83
Fix erronously passing whole Error obj to ClickException instead of j…
juhoinkinen May 25, 2023
b74d869
Annotate (manually) annif/backend/hyperopt.py
juhoinkinen May 25, 2023
4eb904e
Manually annotate annif/backend/mixins.py
juhoinkinen May 26, 2023
6987c05
Manually annotate annif/corpus/document.py
juhoinkinen May 26, 2023
76022aa
Upgrade to PEP 585 and PEP 604 typing features/syntax
juhoinkinen May 29, 2023
5f84a56
Manually annotate annif/corpus/parallel.py
juhoinkinen May 30, 2023
b9cfacf
Manually annotate annif/util.py
juhoinkinen May 30, 2023
c49fee9
Fix easily fixable errors noted by Mypy
juhoinkinen May 30, 2023
8092605
Exclude TYPE_CHECKING blocks from test coverage
juhoinkinen Jun 1, 2023
7ec0b73
Narrow down TokenSet tokens type to np.ndarray only
juhoinkinen Jun 1, 2023
fd19c67
Use int instead of int | np.int32
juhoinkinen Jun 1, 2023
486b48f
Move imports for type typechecking only to TYPE_CHECKING blocks
juhoinkinen Jun 1, 2023
6e92123
Restore accidentally removed annif.suggestion import
juhoinkinen Jun 2, 2023
252c75f
Make type optional as it should be
juhoinkinen Jun 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Unify type of params to Dict[str, Any] or DefaultDict[str, Dict
  • Loading branch information
juhoinkinen committed May 25, 2023
commit 33cdcf2a53c1d7bd0d74da2c59ca53369bc4aff1
16 changes: 8 additions & 8 deletions annif/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os.path
from datetime import datetime, timezone
from glob import glob
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from annif import logger
from annif.suggestion import SuggestionBatch
Expand All @@ -24,7 +24,7 @@ class AnnifBackend(metaclass=abc.ABCMeta):
DEFAULT_PARAMETERS = {"limit": 100}

def __init__(
self, backend_id: str, config_params: Any, project: AnnifProject
self, backend_id: str, config_params: Dict[str, Any], project: AnnifProject
) -> None:
"""Initialize backend with specific parameters. The
parameters are a dict. Keys and values depend on the specific
Expand All @@ -34,7 +34,7 @@ def __init__(
self.project = project
self.datadir = project.datadir

def default_params(self) -> Dict[str, Union[str, bool, int]]:
def default_params(self) -> Dict[str, Any]:
return self.DEFAULT_PARAMETERS

@property
Expand All @@ -61,7 +61,7 @@ def modification_time(self) -> Optional[datetime.datetime]:

def _get_backend_params(
self,
params: Optional[Union[Dict[str, str], Dict[str, int], Dict[str, float]]],
params: Optional[Dict[str, Any]],
) -> Dict[str, Any]:
backend_params = dict(self.params)
if params is not None:
Expand All @@ -71,7 +71,7 @@ def _get_backend_params(
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
"""This method can be overridden by backends. It implements
Expand All @@ -81,7 +81,7 @@ def _train(
def train(
self,
corpus: DocumentCorpus,
params: Optional[Union[Dict[str, float], Dict[str, int]]] = None,
params: Optional[Dict[str, Any]] = None,
jobs: int = 0,
) -> None:
"""Train the model on the given document or subject corpus."""
Expand Down Expand Up @@ -116,7 +116,7 @@ def _suggest_batch(
def suggest(
self,
texts: List[str],
params: Optional[Union[Dict[str, str], Dict[str, int]]] = None,
params: Optional[Dict[str, Any]] = None,
) -> SuggestionBatch:
"""Suggest subjects for the input documents and return a list of subject sets
represented as a list of SubjectSuggestion objects."""
Expand Down Expand Up @@ -149,7 +149,7 @@ def _learn(self, corpus, params):
def learn(
self,
corpus: DocumentCorpus,
params: Optional[Dict[str, int]] = None,
params: Optional[Dict[str, Any]] = None,
) -> None:
"""Further train the model on the given document or subject corpus."""
beparams = self._get_backend_params(params)
Expand Down
8 changes: 3 additions & 5 deletions annif/backend/dummy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Dummy backend for testing basic interaction of projects and backends"""
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, List, Union
from typing import TYPE_CHECKING, Any, Dict, List

from annif.suggestion import SubjectSuggestion

Expand All @@ -24,9 +24,7 @@ def default_params(self) -> Dict[str, int]:
def initialize(self, parallel: bool = False) -> None:
self.initialized = True

def _suggest(
self, text: str, params: Dict[str, Union[int, str]]
) -> List[SubjectSuggestion]:
def _suggest(self, text: str, params: Dict[str, Any]) -> List[SubjectSuggestion]:
score = float(params.get("score", 1.0))

# Ensure tests fail if "text" with wrong type ends up here
Expand All @@ -47,7 +45,7 @@ def _suggest(
def _learn(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
) -> None:
# in this dummy backend we "learn" by picking up the subject ID
# of the first subject of the first document in the learning set
Expand Down
10 changes: 4 additions & 6 deletions annif/backend/ensemble.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Ensemble backend that combines results from multiple projects"""
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

import annif.eval
import annif.parallel
Expand Down Expand Up @@ -49,7 +49,7 @@ def _merge_source_batches(
self,
batch_by_source: Dict[str, SuggestionBatch],
sources: List[Tuple[str, float]],
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
) -> SuggestionBatch:
"""Merge the given SuggestionBatches from each source into a single
SuggestionBatch. The default implementation computes a weighted
Expand All @@ -63,7 +63,7 @@ def _merge_source_batches(
)

def _suggest_batch(
self, texts: List[str], params: Dict[str, Union[float, str]]
self, texts: List[str], params: Dict[str, Any]
) -> SuggestionBatch:
sources = annif.util.parse_sources(params["sources"])
batch_by_source = self._suggest_with_sources(texts, sources)
Expand Down Expand Up @@ -159,7 +159,5 @@ def get_hp_optimizer(
) -> EnsembleOptimizer:
return EnsembleOptimizer(self, corpus, metric)

def _train(
self, corpus: DocumentCorpus, params: Dict[str, Union[int, str]], jobs: int = 0
):
def _train(self, corpus: DocumentCorpus, params: Dict[str, Any], jobs: int = 0):
raise NotSupportedException("Training ensemble backend is not possible.")
10 changes: 5 additions & 5 deletions annif/backend/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import collections
import os.path
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Tuple

import fasttext

Expand Down Expand Up @@ -56,7 +56,7 @@ class FastTextBackend(mixins.ChunkingBackend, backend.AnnifBackend):
# defaults for uninitialized instances
_model = None

def default_params(self) -> Dict[str, Union[float, str]]:
def default_params(self) -> Dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(mixins.ChunkingBackend.DEFAULT_PARAMETERS)
params.update(self.DEFAULT_PARAMETERS)
Expand Down Expand Up @@ -119,7 +119,7 @@ def _create_train_file(
corpus, self.datadir, self.TRAIN_FILE, method=self._write_train_file
)

def _create_model(self, params: Dict[str, Union[float, str]], jobs: int) -> None:
def _create_model(self, params: Dict[str, Any], jobs: int) -> None:
self.info("creating fastText model")
trainpath = os.path.join(self.datadir, self.TRAIN_FILE)
modelpath = os.path.join(self.datadir, self.MODEL_FILE)
Expand All @@ -137,7 +137,7 @@ def _create_model(self, params: Dict[str, Union[float, str]], jobs: int) -> None
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
if corpus != "cached":
Expand All @@ -163,7 +163,7 @@ def _predict_chunks(
)

def _suggest_chunks(
self, chunktexts: List[str], params: Dict[str, Union[float, str]]
self, chunktexts: List[str], params: Dict[str, Any]
) -> List[SubjectSuggestion]:
limit = int(params["limit"])
chunklabels, chunkscores = self._predict_chunks(chunktexts, limit)
Expand Down
6 changes: 2 additions & 4 deletions annif/backend/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

import importlib
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

import dateutil.parser
import requests
Expand Down Expand Up @@ -63,9 +63,7 @@ def _get_project_info(self, key: str) -> Optional[Union[bool, str]]:
else:
return None

def _suggest(
self, text: str, params: Dict[str, Union[int, str]]
) -> List[SubjectSuggestion]:
def _suggest(self, text: str, params: Dict[str, Any]) -> List[SubjectSuggestion]:
data = {"text": text}
if "project" in params:
data["project"] = params["project"]
Expand Down
12 changes: 5 additions & 7 deletions annif/backend/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

import os.path
from typing import TYPE_CHECKING, Dict, Iterator, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple

import joblib
import numpy as np
Expand Down Expand Up @@ -93,7 +93,7 @@ class MLLMBackend(hyperopt.AnnifHyperoptBackend):
def get_hp_optimizer(self, corpus: DocumentCorpus, metric: str) -> MLLMOptimizer:
Fixed Show fixed Hide fixed
return MLLMOptimizer(self, corpus, metric)

def default_params(self) -> Dict[str, Union[float, bool]]:
def default_params(self) -> Dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params
Expand Down Expand Up @@ -124,7 +124,7 @@ def initialize(self, parallel: bool = False) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
self.info("starting train")
Expand Down Expand Up @@ -158,16 +158,14 @@ def _generate_candidates(self, text: str) -> List[Candidate]:
def _prediction_to_result(
self,
prediction: List[Tuple[np.float64, int]],
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
) -> Iterator:
vector = np.zeros(len(self.project.subjects), dtype=np.float32)
for score, subject_id in prediction:
vector[subject_id] = score
return vector_to_suggestions(vector, int(params["limit"]))

def _suggest(
self, text: str, params: Dict[str, Union[float, bool, str]]
) -> Iterator:
def _suggest(self, text: str, params: Dict[str, Any]) -> Iterator:
candidates = self._generate_candidates(text)
prediction = self._model.predict(candidates)
return self._prediction_to_result(prediction, params)
10 changes: 5 additions & 5 deletions annif/backend/nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os.path
import shutil
from io import BytesIO
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union

import joblib
import lmdb
Expand Down Expand Up @@ -112,7 +112,7 @@ class NNEnsembleBackend(backend.AnnifLearningBackend, ensemble.BaseEnsembleBacke
# defaults for uninitialized instances
_model = None

def default_params(self) -> Dict[str, Union[float, str]]:
def default_params(self) -> Dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params
Expand Down Expand Up @@ -140,7 +140,7 @@ def _merge_source_batches(
self,
batch_by_source: Dict[str, SuggestionBatch],
sources: List[Tuple[str, float]],
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
) -> SuggestionBatch:
src_weight = dict(sources)
score_vectors = np.array(
Expand Down Expand Up @@ -199,7 +199,7 @@ def _create_model(self, sources: List[Tuple[str, float]]) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
sources = annif.util.parse_sources(self.params["sources"])
Expand Down Expand Up @@ -286,7 +286,7 @@ def _fit_model(
def _learn(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
) -> None:
self.initialize()
self._fit_model(
Expand Down
10 changes: 5 additions & 5 deletions annif/backend/omikuji.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import os.path
import shutil
from typing import TYPE_CHECKING, Dict, List, Union
from typing import TYPE_CHECKING, Any, Dict, List

import omikuji

Expand Down Expand Up @@ -43,7 +43,7 @@ class OmikujiBackend(mixins.TfidfVectorizerMixin, backend.AnnifBackend):
"collapse_every_n_layers": 0,
}

def default_params(self) -> Dict[str, Union[int, bool]]:
def default_params(self) -> Dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params
Expand Down Expand Up @@ -96,7 +96,7 @@ def _create_train_file(self, veccorpus: csr_matrix, corpus: DocumentCorpus) -> N
trainfile.seek(0)
print("{:08d}".format(n_samples), end="", file=trainfile)

def _create_model(self, params: Dict[str, Union[int, bool]], jobs: int) -> None:
def _create_model(self, params: Dict[str, Any], jobs: int) -> None:
train_path = os.path.join(self.datadir, self.TRAIN_FILE)
model_path = os.path.join(self.datadir, self.MODEL_FILE)
hyper_param = omikuji.Model.default_hyper_param()
Expand All @@ -114,7 +114,7 @@ def _create_model(self, params: Dict[str, Union[int, bool]], jobs: int) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, bool]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
if corpus != "cached":
Expand All @@ -135,7 +135,7 @@ def _train(
self._create_model(params, jobs)

def _suggest_batch(
self, texts: List[str], params: Dict[str, Union[int, bool]]
self, texts: List[str], params: Dict[str, Any]
) -> SuggestionBatch:
vector = self.vectorizer.transform(texts)
limit = int(params["limit"])
Expand Down
8 changes: 4 additions & 4 deletions annif/backend/pav.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from __future__ import annotations

import os.path
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Tuple

import joblib
import numpy as np
Expand Down Expand Up @@ -36,7 +36,7 @@ class PAVBackend(ensemble.BaseEnsembleBackend):

DEFAULT_PARAMETERS = {"min-docs": 10}

def default_params(self) -> Dict[str, int]:
def default_params(self) -> Dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params
Expand Down Expand Up @@ -67,7 +67,7 @@ def _merge_source_batches(
self,
batch_by_source: Dict[str, SuggestionBatch],
sources: List[Tuple[str, float]],
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
) -> SuggestionBatch:
reg_batch_by_source = {}
for project_id, batch in batch_by_source.items():
Expand Down Expand Up @@ -156,7 +156,7 @@ def _create_pav_model(
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
if corpus == "cached":
Expand Down
Loading