Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smarter initialization of optional analyzers #820

Merged
merged 2 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions annif/analyzer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import annif
from annif.util import parse_args

from . import simple, simplemma, snowball
from . import simple, simplemma, snowball, spacy, voikko

if TYPE_CHECKING:
from annif.analyzer.analyzer import Analyzer
Expand All @@ -17,7 +17,10 @@


def register_analyzer(analyzer):
_analyzers[analyzer.name] = analyzer
if analyzer.is_available():
_analyzers[analyzer.name] = analyzer
else:
annif.logger.debug(f"{analyzer.name} analyzer not available, not enabling it")


def get_analyzer(analyzerspec: str) -> Analyzer:
Expand All @@ -37,18 +40,5 @@ def get_analyzer(analyzerspec: str) -> Analyzer:
register_analyzer(simple.SimpleAnalyzer)
register_analyzer(snowball.SnowballAnalyzer)
register_analyzer(simplemma.SimplemmaAnalyzer)

# Optional analyzers
try:
from . import voikko

register_analyzer(voikko.VoikkoAnalyzer)
except ImportError:
annif.logger.debug("voikko not available, not enabling voikko analyzer")

try:
from . import spacy

register_analyzer(spacy.SpacyAnalyzer)
except ImportError:
annif.logger.debug("spaCy not available, not enabling spacy analyzer")
register_analyzer(voikko.VoikkoAnalyzer)
register_analyzer(spacy.SpacyAnalyzer)
5 changes: 5 additions & 0 deletions annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ class Analyzer(metaclass=abc.ABCMeta):
name = None
token_min_length = 3 # default value, can be overridden in instances

@staticmethod
def is_available() -> bool:
"""Return True if the analyzer is available for use, False if not."""
return True # can be overridden in implementations if necessary

def __init__(self, **kwargs) -> None:
if _KEY_TOKEN_MIN_LENGTH in kwargs:
self.token_min_length = int(kwargs[_KEY_TOKEN_MIN_LENGTH])
Expand Down
7 changes: 7 additions & 0 deletions annif/analyzer/spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from __future__ import annotations

import importlib

import annif.util
from annif.exception import OperationFailedException

Expand All @@ -13,6 +15,11 @@
class SpacyAnalyzer(analyzer.Analyzer):
name = "spacy"

@staticmethod
def is_available() -> bool:
# return True iff spaCy is installed
return importlib.util.find_spec("spacy") is not None

def __init__(self, param: str, **kwargs) -> None:
import spacy

Expand Down
10 changes: 8 additions & 2 deletions annif/analyzer/voikko.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@
from __future__ import annotations

import functools

import voikko.libvoikko
import importlib

from . import analyzer


class VoikkoAnalyzer(analyzer.Analyzer):
name = "voikko"

@staticmethod
def is_available() -> bool:
# return True iff Voikko is installed
return importlib.util.find_spec("voikko") is not None

def __init__(self, param: str, **kwargs) -> None:
self.param = param
self.voikko = None
Expand All @@ -26,6 +30,8 @@ def __getstate__(self) -> dict[str, str | None]:

@functools.lru_cache(maxsize=500000)
def _normalize_word(self, word: str) -> str:
import voikko.libvoikko

if self.voikko is None:
self.voikko = voikko.libvoikko.Voikko(self.param)
result = self.voikko.analyze(word)
Expand Down
5 changes: 4 additions & 1 deletion tests/test_analyzer_spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import pytest

import annif.analyzer
import annif.analyzer.spacy
from annif.exception import OperationFailedException

spacy = pytest.importorskip("spacy")
pytestmark = pytest.mark.skipif(
not annif.analyzer.spacy.SpacyAnalyzer.is_available(), reason="spaCy is required"
)


def test_spacy_model_not_found():
Expand Down
5 changes: 4 additions & 1 deletion tests/test_analyzer_voikko.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import pytest

import annif.analyzer
import annif.analyzer.voikko

voikko = pytest.importorskip("annif.analyzer.voikko")
pytestmark = pytest.mark.skipif(
not annif.analyzer.voikko.VoikkoAnalyzer.is_available(), reason="voikko is required"
)


def test_voikko_getstate():
Expand Down