Skip to content

Commit

Permalink
Merge pull request #820 from NatLibFi/fix-optional-analyzer-use-impor…
Browse files Browse the repository at this point in the history
…tlib

Smarter initialization of optional analyzers
  • Loading branch information
osma authored Nov 22, 2024
2 parents 4809561 + 419f8df commit d907024
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 21 deletions.
24 changes: 7 additions & 17 deletions annif/analyzer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import annif
from annif.util import parse_args

from . import simple, simplemma, snowball
from . import simple, simplemma, snowball, spacy, voikko

if TYPE_CHECKING:
from annif.analyzer.analyzer import Analyzer
Expand All @@ -17,7 +17,10 @@


def register_analyzer(analyzer):
_analyzers[analyzer.name] = analyzer
if analyzer.is_available():
_analyzers[analyzer.name] = analyzer
else:
annif.logger.debug(f"{analyzer.name} analyzer not available, not enabling it")


def get_analyzer(analyzerspec: str) -> Analyzer:
Expand All @@ -37,18 +40,5 @@ def get_analyzer(analyzerspec: str) -> Analyzer:
register_analyzer(simple.SimpleAnalyzer)
register_analyzer(snowball.SnowballAnalyzer)
register_analyzer(simplemma.SimplemmaAnalyzer)

# Optional analyzers
try:
from . import voikko

register_analyzer(voikko.VoikkoAnalyzer)
except ImportError:
annif.logger.debug("voikko not available, not enabling voikko analyzer")

try:
from . import spacy

register_analyzer(spacy.SpacyAnalyzer)
except ImportError:
annif.logger.debug("spaCy not available, not enabling spacy analyzer")
register_analyzer(voikko.VoikkoAnalyzer)
register_analyzer(spacy.SpacyAnalyzer)
5 changes: 5 additions & 0 deletions annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ class Analyzer(metaclass=abc.ABCMeta):
name = None
token_min_length = 3 # default value, can be overridden in instances

@staticmethod
def is_available() -> bool:
"""Return True if the analyzer is available for use, False if not."""
return True # can be overridden in implementations if necessary

def __init__(self, **kwargs) -> None:
if _KEY_TOKEN_MIN_LENGTH in kwargs:
self.token_min_length = int(kwargs[_KEY_TOKEN_MIN_LENGTH])
Expand Down
7 changes: 7 additions & 0 deletions annif/analyzer/spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from __future__ import annotations

import importlib

import annif.util
from annif.exception import OperationFailedException

Expand All @@ -13,6 +15,11 @@
class SpacyAnalyzer(analyzer.Analyzer):
name = "spacy"

@staticmethod
def is_available() -> bool:
# return True iff spaCy is installed
return importlib.util.find_spec("spacy") is not None

def __init__(self, param: str, **kwargs) -> None:
import spacy

Expand Down
10 changes: 8 additions & 2 deletions annif/analyzer/voikko.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@
from __future__ import annotations

import functools

import voikko.libvoikko
import importlib

from . import analyzer


class VoikkoAnalyzer(analyzer.Analyzer):
name = "voikko"

@staticmethod
def is_available() -> bool:
# return True iff Voikko is installed
return importlib.util.find_spec("voikko") is not None

def __init__(self, param: str, **kwargs) -> None:
self.param = param
self.voikko = None
Expand All @@ -26,6 +30,8 @@ def __getstate__(self) -> dict[str, str | None]:

@functools.lru_cache(maxsize=500000)
def _normalize_word(self, word: str) -> str:
import voikko.libvoikko

if self.voikko is None:
self.voikko = voikko.libvoikko.Voikko(self.param)
result = self.voikko.analyze(word)
Expand Down
5 changes: 4 additions & 1 deletion tests/test_analyzer_spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import pytest

import annif.analyzer
import annif.analyzer.spacy
from annif.exception import OperationFailedException

spacy = pytest.importorskip("spacy")
pytestmark = pytest.mark.skipif(
not annif.analyzer.spacy.SpacyAnalyzer.is_available(), reason="spaCy is required"
)


def test_spacy_model_not_found():
Expand Down
5 changes: 4 additions & 1 deletion tests/test_analyzer_voikko.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import pytest

import annif.analyzer
import annif.analyzer.voikko

voikko = pytest.importorskip("annif.analyzer.voikko")
pytestmark = pytest.mark.skipif(
not annif.analyzer.voikko.VoikkoAnalyzer.is_available(), reason="voikko is required"
)


def test_voikko_getstate():
Expand Down

0 comments on commit d907024

Please sign in to comment.