Skip to content

Commit

Permalink
[update] Refactor Sapphire class
Browse files Browse the repository at this point in the history
- Add '__call__' method
- Add some docstrings
- Update README in accordance with the above changes
  • Loading branch information
m-yoshinaka committed Oct 5, 2020
1 parent 24eb132 commit 9a4758f
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ If you change the hyper-parameters,
After preparing a **tokenized** sentence pair
(`tokenized_sentence_a: list` and `tokenized_sentence_b: list`),
```
>>> _, alignment = aligner.align(tokenized_sentence_a, tokenized_sentence_b)
>>> _, alignment = aligner(tokenized_sentence_a, tokenized_sentence_b)
>>> alignment
[(1, 3, 2, 3), (8, 9, 5, 6), (13, 13, 8, 8), (27, 27, 9, 9)]
```
Expand Down
2 changes: 1 addition & 1 deletion run_sapphire.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def run_sapphire(args):

tokens_src = sentence_src.split()
tokens_trg = sentence_trg.split()
_, alignment = aligner.align(tokens_src, tokens_trg)
_, alignment = aligner(tokens_src, tokens_trg)

print('{:-^48}'.format(' Result '))
print('{0:^24}{1:^24}'.format('Sentence A', 'Sentence B'))
Expand Down
42 changes: 42 additions & 0 deletions sapphire/sapphire.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,29 @@


class Sapphire(object):
"""
SAPPHIRE : monolingual phrase aligner
Attributes
----------
vectorizer : FastTextVectorize
Vectorize words using fastText (Bojanowski et al., 2017).
word_aligner : WordAlign
Align words in two sentences.
extractor : PhraseExtract
Extract phrase pairs in two sentences based on word alignment and
calculate alignment scores of phrase pairs.
phrase_aligner : PhraseAlign
Search for a phrase alignment with the highest total alignment score.
Methods
-------
set_params(lambda_=0.6, delta=0.6, alpha=0.01, hungarian=False)
Set hyper-parameters of SAPPHIRE.
align(tokens_src, tokens_trg)
Get word alignment and phrase alignment.
"""

def __init__(self, model):
self.vectorizer = FastTextVectorize(model)
Expand All @@ -13,7 +36,26 @@ def __init__(self, model):
self.extractor = PhraseExtract(self.delta, self.alpha)
self.phrase_aligner = PhraseAlign()

def __call__(self, tokens_src, tokens_trg):
return self.align(tokens_src, tokens_trg)

def set_params(self, lambda_=0.6, delta=0.6, alpha=0.01, hungarian=False):
"""
Set hyper-parameters of SAPPHIRE.
Details are discussed in the following paper:
https://www.aclweb.org/anthology/2020.lrec-1.847/ .
Parameters
----------
lambda_ : float
Prunes word alignment candidates.
delta : float
Prunes phrase alignment candidates.
alpha : float
Biases the phrase alignment score based on the lengths of phrases.
hungarian : bool
Whether to use the extended Hangarian method to get word alignment.
"""
self.lambda_ = lambda_
self.delta = delta
self.alpha = alpha
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def read_requirements():

setup(
name='sapphire',
version='0.1.1',
version='0.2.0',
description='Simple Aligner for Phrasal Paraphrase \
with Hierarchical Representation',
author='Masato Yoshinaka',
Expand Down

0 comments on commit 9a4758f

Please sign in to comment.