Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make vocabularies multilingual #600

Merged
merged 12 commits into from
Aug 8, 2022
Prev Previous commit
Next Next commit
refactor vocabulary creation
  • Loading branch information
osma committed Aug 5, 2022
commit 820436873020dfb1c70660a9baa92898040404ef
15 changes: 3 additions & 12 deletions annif/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import enum
import os.path
import re
from shutil import rmtree
import annif
import annif.transform
Expand All @@ -14,7 +13,6 @@
from annif.datadir import DatadirMixin
from annif.exception import AnnifException, ConfigurationException, \
NotSupportedException, NotInitializedException
from annif.util import parse_args

logger = annif.logger

Expand Down Expand Up @@ -157,17 +155,10 @@ def vocab(self):
if self.vocab_spec is None:
raise ConfigurationException("vocab setting is missing",
project_id=self.project_id)
match = re.match(r'(\w+)(\((.*)\))?', self.vocab_spec)
if match is None:
raise ConfigurationException("vocab setting is invalid",
project_id=self.project_id)
vocab_id = match.group(1)
posargs, kwargs = parse_args(match.group(3))
language = posargs[0] if posargs else self.language
self._vocab = annif.vocab.get_vocab(self.vocab_spec,
self._base_datadir,
self.language)

self._vocab = annif.vocab.AnnifVocabulary(vocab_id,
self._base_datadir,
language)
return self._vocab

@property
Expand Down
13 changes: 13 additions & 0 deletions annif/vocab.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
"""Vocabulary management functionality for Annif"""

import os.path
import re
import annif
import annif.corpus
import annif.util
from annif.datadir import DatadirMixin
from annif.exception import NotInitializedException
from annif.util import parse_args

logger = annif.logger


def get_vocab(vocab_spec, datadir, default_language):
match = re.match(r'(\w+)(\((.*)\))?', vocab_spec)
if match is None:
raise ValueError(f"Invalid vocabulary specification: {vocab_spec}")
vocab_id = match.group(1)
posargs, kwargs = parse_args(match.group(3))
language = posargs[0] if posargs else default_language

return AnnifVocabulary(vocab_id, datadir, language)


class AnnifVocabulary(DatadirMixin):
"""Class representing a subject vocabulary which can be used by multiple
Annif projects."""
Expand Down