[update] Refactor Sapphire class

- Add '__call__' method - Add some docstrings - Update README in accordance with the above changes
m-yoshinaka · Oct 5, 2020 · 9a4758f · 9a4758f
1 parent 24eb132
commit 9a4758f
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -87,7 +87,7 @@ If you change the hyper-parameters,
 After preparing a **tokenized** sentence pair
 (`tokenized_sentence_a: list` and `tokenized_sentence_b: list`),
 ```
->>> _, alignment = aligner.align(tokenized_sentence_a, tokenized_sentence_b)
+>>> _, alignment = aligner(tokenized_sentence_a, tokenized_sentence_b)
 >>> alignment
 [(1, 3, 2, 3), (8, 9, 5, 6), (13, 13, 8, 8), (27, 27, 9, 9)]
 ```

diff --git a/run_sapphire.py b/run_sapphire.py
@@ -32,7 +32,7 @@ def run_sapphire(args):
 
             tokens_src = sentence_src.split()
             tokens_trg = sentence_trg.split()
-            _, alignment = aligner.align(tokens_src, tokens_trg)
+            _, alignment = aligner(tokens_src, tokens_trg)
 
             print('{:-^48}'.format(' Result '))
             print('{0:^24}{1:^24}'.format('Sentence A', 'Sentence B'))

diff --git a/sapphire/sapphire.py b/sapphire/sapphire.py
@@ -5,6 +5,29 @@
 
 
 class Sapphire(object):
+    """
+    SAPPHIRE : monolingual phrase aligner
+
+    Attributes
+    ----------
+    vectorizer : FastTextVectorize
+        Vectorize words using fastText (Bojanowski et al., 2017).
+    word_aligner : WordAlign
+        Align words in two sentences.
+    extractor : PhraseExtract
+        Extract phrase pairs in two sentences based on word alignment and
+        calculate alignment scores of phrase pairs.
+    phrase_aligner : PhraseAlign
+        Search for a phrase alignment with the highest total alignment score.
+
+    Methods
+    -------
+    set_params(lambda_=0.6, delta=0.6, alpha=0.01, hungarian=False)
+        Set hyper-parameters of SAPPHIRE.
+    align(tokens_src, tokens_trg)
+        Get word alignment and phrase alignment.
+
+    """
 
     def __init__(self, model):
         self.vectorizer = FastTextVectorize(model)
@@ -13,7 +36,26 @@ def __init__(self, model):
         self.extractor = PhraseExtract(self.delta, self.alpha)
         self.phrase_aligner = PhraseAlign()
 
+    def __call__(self, tokens_src, tokens_trg):
+        return self.align(tokens_src, tokens_trg)
+
     def set_params(self, lambda_=0.6, delta=0.6, alpha=0.01, hungarian=False):
+        """
+        Set hyper-parameters of SAPPHIRE.
+        Details are discussed in the following paper:
+        https://www.aclweb.org/anthology/2020.lrec-1.847/ .
+
+        Parameters
+        ----------
+        lambda_ : float
+            Prunes word alignment candidates.
+        delta : float
+            Prunes phrase alignment candidates.
+        alpha : float
+            Biases the phrase alignment score based on the lengths of phrases.
+        hungarian : bool
+            Whether to use the extended Hangarian method to get word alignment.
+        """
         self.lambda_ = lambda_
         self.delta = delta
         self.alpha = alpha

diff --git a/setup.py b/setup.py
@@ -11,7 +11,7 @@ def read_requirements():
 
 setup(
     name='sapphire',
-    version='0.1.1',
+    version='0.2.0',
     description='Simple Aligner for Phrasal Paraphrase \
                  with Hierarchical Representation',
     author='Masato Yoshinaka',