Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Python type hints #708

Merged
merged 28 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f5f32dd
Run "monkeytype apply --pep_563" on all modules
juhoinkinen May 23, 2023
c8a04cb
Use dict instead of OrderedDict
juhoinkinen May 23, 2023
7ed97a5
Make imports that were too eagerly made conditional default again
juhoinkinen May 23, 2023
1a36c09
Fix flake8 errors
juhoinkinen May 23, 2023
1660e56
Turn forward references non-strings
juhoinkinen May 23, 2023
61decb7
Use less specific types
juhoinkinen May 23, 2023
ba77e84
Remove "Union[Any," in hints
juhoinkinen May 24, 2023
2f9ee10
Move comment back to its original place
juhoinkinen May 24, 2023
d2c5e53
Fix some omissions and errors by monkeytype
juhoinkinen May 24, 2023
f242a98
Simplify hints using float for Union[int, float]
juhoinkinen May 24, 2023
7c3c5dc
Simplify hints using Sequence for Union[Tuple, List]
juhoinkinen May 24, 2023
961dd09
Remove too wide usage of Any (e.g. in Unions, Lists, Iterators)
juhoinkinen May 25, 2023
33cdcf2
Unify type of params to Dict[str, Any] or DefaultDict[str, Dict
juhoinkinen May 25, 2023
bb9951f
Simplify overly complex types
juhoinkinen May 25, 2023
c405d83
Fix erronously passing whole Error obj to ClickException instead of j…
juhoinkinen May 25, 2023
b74d869
Annotate (manually) annif/backend/hyperopt.py
juhoinkinen May 25, 2023
4eb904e
Manually annotate annif/backend/mixins.py
juhoinkinen May 26, 2023
6987c05
Manually annotate annif/corpus/document.py
juhoinkinen May 26, 2023
76022aa
Upgrade to PEP 585 and PEP 604 typing features/syntax
juhoinkinen May 29, 2023
5f84a56
Manually annotate annif/corpus/parallel.py
juhoinkinen May 30, 2023
b9cfacf
Manually annotate annif/util.py
juhoinkinen May 30, 2023
c49fee9
Fix easily fixable errors noted by Mypy
juhoinkinen May 30, 2023
8092605
Exclude TYPE_CHECKING blocks from test coverage
juhoinkinen Jun 1, 2023
7ec0b73
Narrow down TokenSet tokens type to np.ndarray only
juhoinkinen Jun 1, 2023
fd19c67
Use int instead of int | np.int32
juhoinkinen Jun 1, 2023
486b48f
Move imports for type typechecking only to TYPE_CHECKING blocks
juhoinkinen Jun 1, 2023
6e92123
Restore accidentally removed annif.suggestion import
juhoinkinen Jun 2, 2023
252c75f
Make type optional as it should be
juhoinkinen Jun 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions annif/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
#!/usr/bin/env python3

from __future__ import annotations

import logging
import os
import os.path
from typing import TYPE_CHECKING, Optional

logging.basicConfig()
logger = logging.getLogger("annif")
logger.setLevel(level=logging.INFO)


import annif.backend # noqa

if TYPE_CHECKING:
from flask.app import Flask


def create_flask_app(config_name=None):
def create_flask_app(config_name: Optional[str] = None) -> Flask:
"""Create a Flask app to be used by the CLI."""
from flask import Flask

Expand All @@ -23,7 +30,7 @@ def create_flask_app(config_name=None):
return app


def create_app(config_name=None):
def create_app(config_name: Optional[str] = None) -> Flask:
"""Create a Connexion app to be used for the API."""
# 'cxapp' here is the Connexion application that has a normal Flask app
# as a property (cxapp.app)
Expand Down Expand Up @@ -60,7 +67,7 @@ def create_app(config_name=None):
return cxapp.app


def _get_config_name(config_name):
def _get_config_name(config_name: Optional[str]) -> str:
if config_name is None:
config_name = os.environ.get("ANNIF_CONFIG")
if config_name is None:
Expand Down
7 changes: 6 additions & 1 deletion annif/analyzer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
"""Collection of language-specific analyzers and analyzer registry for Annif"""
from __future__ import annotations

import re
from typing import TYPE_CHECKING

import annif
from annif.util import parse_args

from . import simple, simplemma, snowball

if TYPE_CHECKING:
from annif.analyzer.analyzer import Analyzer

_analyzers = {}


def register_analyzer(analyzer):
_analyzers[analyzer.name] = analyzer


def get_analyzer(analyzerspec):
def get_analyzer(analyzerspec: str) -> Analyzer:
match = re.match(r"(\w+)(\((.*)\))?", analyzerspec)
if match is None:
raise ValueError("Invalid analyzer specification {}".format(analyzerspec))
Expand Down
10 changes: 6 additions & 4 deletions annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Common functionality for analyzers."""
from __future__ import annotations

import abc
import functools
import unicodedata
from typing import List

_KEY_TOKEN_MIN_LENGTH = "token_min_length"

Expand All @@ -15,18 +17,18 @@ class Analyzer(metaclass=abc.ABCMeta):
name = None
token_min_length = 3 # default value, can be overridden in instances

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
if _KEY_TOKEN_MIN_LENGTH in kwargs:
self.token_min_length = int(kwargs[_KEY_TOKEN_MIN_LENGTH])

def tokenize_sentences(self, text):
def tokenize_sentences(self, text: str) -> List[str]:
"""Tokenize a piece of text (e.g. a document) into sentences."""
import nltk.tokenize

return nltk.tokenize.sent_tokenize(text)

@functools.lru_cache(maxsize=50000)
def is_valid_token(self, word):
def is_valid_token(self, word: str) -> bool:
"""Return True if the word is an acceptable token."""
if len(word) < self.token_min_length:
return False
Expand All @@ -36,7 +38,7 @@ def is_valid_token(self, word):
return True
return False

def tokenize_words(self, text, filter=True):
def tokenize_words(self, text: str, filter: bool = True) -> List[str]:
"""Tokenize a piece of text (e.g. a sentence) into words. If
filter=True (default), only return valid tokens (e.g. not
punctuation, numbers or very short words)"""
Expand Down
5 changes: 3 additions & 2 deletions annif/analyzer/simple.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""Simple analyzer for Annif. Only folds words to lower case."""
from __future__ import annotations

from . import analyzer


class SimpleAnalyzer(analyzer.Analyzer):
name = "simple"

def __init__(self, param, **kwargs):
def __init__(self, param: None, **kwargs) -> None:
self.param = param
super().__init__(**kwargs)

def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return word.lower()
5 changes: 3 additions & 2 deletions annif/analyzer/simplemma.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Simplemma analyzer for Annif, based on simplemma lemmatizer."""
from __future__ import annotations

import simplemma

Expand All @@ -8,9 +9,9 @@
class SimplemmaAnalyzer(analyzer.Analyzer):
name = "simplemma"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.lang = param
super().__init__(**kwargs)

def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return simplemma.lemmatize(word, lang=self.lang)
5 changes: 3 additions & 2 deletions annif/analyzer/snowball.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Snowball analyzer for Annif, based on nltk Snowball stemmer."""
from __future__ import annotations

import functools

Expand All @@ -8,13 +9,13 @@
class SnowballAnalyzer(analyzer.Analyzer):
name = "snowball"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.param = param
import nltk.stem.snowball

self.stemmer = nltk.stem.snowball.SnowballStemmer(param)
super().__init__(**kwargs)

@functools.lru_cache(maxsize=500000)
def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return self.stemmer.stem(word.lower())
7 changes: 5 additions & 2 deletions annif/analyzer/spacy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""spaCy analyzer for Annif which uses spaCy for lemmatization"""
from __future__ import annotations

from typing import List

import annif.util
from annif.exception import OperationFailedException
Expand All @@ -11,7 +14,7 @@
class SpacyAnalyzer(analyzer.Analyzer):
name = "spacy"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
import spacy

self.param = param
Expand All @@ -28,7 +31,7 @@ def __init__(self, param, **kwargs):
self.lowercase = False
super().__init__(**kwargs)

def tokenize_words(self, text, filter=True):
def tokenize_words(self, text: str, filter: bool = True) -> List[str]:
lemmas = [
lemma
for lemma in (token.lemma_ for token in self.nlp(text.strip()))
Expand Down
8 changes: 5 additions & 3 deletions annif/analyzer/voikko.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Voikko analyzer for Annif, based on libvoikko library."""
from __future__ import annotations

import functools
from typing import Dict, Optional

import voikko.libvoikko

Expand All @@ -10,20 +12,20 @@
class VoikkoAnalyzer(analyzer.Analyzer):
name = "voikko"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.param = param
self.voikko = None
super().__init__(**kwargs)

def __getstate__(self):
def __getstate__(self) -> Dict[str, Optional[str]]:
"""Return the state of the object for pickling purposes. The Voikko
instance is set to None because as a ctypes object it cannot be
pickled."""

return {"param": self.param, "voikko": None}

@functools.lru_cache(maxsize=500000)
def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
if self.voikko is None:
self.voikko = voikko.libvoikko.Voikko(self.param)
result = self.voikko.analyze(word)
Expand Down
32 changes: 19 additions & 13 deletions annif/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
"""Registry of backend types for Annif"""
from __future__ import annotations

from typing import TYPE_CHECKING, Type

if TYPE_CHECKING:
from annif.backend.backend import AnnifBackend


# define functions for lazily importing each backend (alphabetical order)
def _dummy():
def _dummy() -> Type[AnnifBackend]:
from . import dummy

return dummy.DummyBackend


def _ensemble():
def _ensemble() -> Type[AnnifBackend]:
from . import ensemble

return ensemble.EnsembleBackend


def _fasttext():
def _fasttext() -> Type[AnnifBackend]:
try:
from . import fasttext

Expand All @@ -23,19 +29,19 @@ def _fasttext():
raise ValueError("fastText not available, cannot use fasttext backend")


def _http():
def _http() -> Type[AnnifBackend]:
from . import http

return http.HTTPBackend


def _mllm():
def _mllm() -> Type[AnnifBackend]:
from . import mllm

return mllm.MLLMBackend


def _nn_ensemble():
def _nn_ensemble() -> Type[AnnifBackend]:
try:
from . import nn_ensemble

Expand All @@ -46,7 +52,7 @@ def _nn_ensemble():
)


def _omikuji():
def _omikuji() -> Type[AnnifBackend]:
try:
from . import omikuji

Expand All @@ -55,13 +61,13 @@ def _omikuji():
raise ValueError("Omikuji not available, cannot use omikuji backend")


def _pav():
def _pav() -> Type[AnnifBackend]:
from . import pav

return pav.PAVBackend


def _stwfsa():
def _stwfsa() -> Type[AnnifBackend]:
try:
from . import stwfsa

Expand All @@ -70,19 +76,19 @@ def _stwfsa():
raise ValueError("STWFSA not available, cannot use stwfsa backend")


def _svc():
def _svc() -> Type[AnnifBackend]:
from . import svc

return svc.SVCBackend


def _tfidf():
def _tfidf() -> Type[AnnifBackend]:
from . import tfidf

return tfidf.TFIDFBackend


def _yake():
def _yake() -> Type[AnnifBackend]:
try:
from . import yake

Expand All @@ -108,7 +114,7 @@ def _yake():
}


def get_backend(backend_id):
def get_backend(backend_id: str) -> Type[AnnifBackend]:
if backend_id in _backend_fns:
return _backend_fns[backend_id]()
else:
Expand Down
Loading