Skip to content

Commit

Permalink
Improve some translations.
Browse files Browse the repository at this point in the history
  • Loading branch information
jsichi committed Feb 3, 2021
1 parent 4067fe5 commit 9612644
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 72 deletions.
18 changes: 16 additions & 2 deletions cli/src/test/resources/expect/phlebotinum-spanish-script.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@ Tú estás en una habitación.
// truly lovely automatic translation
Artículos agusanado de historia ajeno underwear se disiparon aproximadamente.

Tú ves una puerta grande al este.
Tú ves una puerta grande al oriente.

> l

OK.

Tú estás en la habitación.

Artículos agusanado de historia ajeno underwear se disiparon aproximadamente.

Tú ves la puerta grande al oriente.

> x la puerta

Expand Down Expand Up @@ -51,6 +61,10 @@ No se puede ir por ese camino.

OK.

> i

Tú llevas el hacha.

> tira el hacha hacia el ladrón

OK.
Expand All @@ -67,7 +81,7 @@ OK.

Tú estás en la habitación.

Tú ves la puerta grande al este.
Tú ves la puerta grande al oriente.

> dónde está el hacha?

Expand Down
149 changes: 79 additions & 70 deletions src/main/scala/com/lingeringsocket/shlurd/nlang/SnlTranslator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -118,39 +118,91 @@ class SnlTranslator(
output
}

private def chooseLemma(
word : SilWord,
senses : Seq[Synset],
pos : POS) : SilWord =
{
// FIXME what about compound words? Also should maybe
// only preserve senses with the same lemma?
val mixedLemmas = senses.map(
_.getWords.asScala.map(_.getLemma))
val filteredLemmas = mixedLemmas.map(seq => {
seq.filterNot(_.head.isUpper)
})
val senseLemmas = {
if (filteredLemmas.exists(_.nonEmpty)) {
filteredLemmas
} else {
mixedLemmas
}
}
val lemmas = senseLemmas.flatten.distinct.filterNot(_.startsWith("`"))
// FIXME optimize computation
val lemmaCounts = lemmas.map(lemma => {
tupleN(lemma, senseLemmas.count(_.contains(lemma)))
}).toMap
val maxCount = lemmaCounts.values.max
val maxLemmas = lemmaCounts.filter(_._2 == maxCount)
val lemma = {
if (maxLemmas.size == 1) {
maxLemmas.head._1
} else {
lemmas.head
val candidateLemmas = mixedLemmas.find(
_.exists(
lemma => !lemma.head.isUpper)).getOrElse(mixedLemmas.head)
targetTongue.chooseVariant(pos, candidateLemmas)
}
}
SilWord("", lemma, targetWordnet.getSenseId(senses))
}

private def chooseWord(
word : SilWord,
senses : Seq[Synset],
pos : POS) : SilWord =
{
if (senses.isEmpty) {
word.withSense("")
} else {
val translatedSenses = senses.flatMap(synset => {
alignment.mapSense(synset, direction)
}).distinct
if (translatedSenses.isEmpty) {
// we must discard original sense identifiers, since they aren't
// relevant in the target tongue
word.withSense("")
} else {
chooseLemma(word, translatedSenses, pos)
}
}
}

private def reorderActionSenses(
ps : SilPredicateSentence,
ap : SilActionPredicate,
scorer : SilPhraseScorer) : SilActionPredicate =
{
def newVerb(senses : Seq[Synset]) = {
val lemmas = senses.head.getWords.asScala.map(_.getLemma)
val lemma = targetTongue.chooseVariant(POS.VERB, lemmas)
SilWord("", lemma, targetWordnet.getSenseId(senses))
}
if (ap.verb.senseId.isEmpty) {
val verb = ap.verb
if (verb.senseId.isEmpty) {
ap
} else {
val sortedSenses = targetWordnet.findSenses(
ap.verb.senseId
verb.senseId
).map(sense => {
val pred = ap.withNewWord(newVerb(Seq(sense)))
val pred = ap.withNewWord(chooseLemma(verb, Seq(sense), POS.VERB))
val score = scorer.computeGlobalScore(ps.copy(predicate = pred))
tupleN(sense, score)
}).sortBy(_._2).reverse.map(_._1)
ap.withNewWord(newVerb(sortedSenses))
ap.withNewWord(chooseLemma(verb, sortedSenses.take(1), POS.VERB))
}
}

private def reorderNounSenses(
ref : SilNounReference,
scorer : SilPhraseScorer) : SilNounReference =
{
def newNoun(senses : Seq[Synset]) = {
val lemmas = senses.head.getWords.asScala.map(_.getLemma)
val lemma = targetTongue.chooseVariant(POS.NOUN, lemmas)
SilWord("", lemma, targetWordnet.getSenseId(senses))
}
val noun = ref.noun
if (noun.senseId.isEmpty) {
ref
Expand All @@ -162,7 +214,7 @@ class SnlTranslator(
val testSentence = SilPredicateSentence(
SilStatePredicate(
annotator.determinedNounRef(
newNoun(Seq(sense)),
chooseLemma(noun, Seq(sense), POS.NOUN),
DETERMINER_DEFINITE,
COUNT_PLURAL),
SprPredefWord(PD_EXIST)(targetTongue).toUninflected,
Expand All @@ -172,70 +224,27 @@ class SnlTranslator(
val score = scorer.computeGlobalScore(testSentence)
tupleN(sense, score)
}).sortBy(_._2).reverse.map(_._1)
ref.withNewWord(newNoun(sortedSenses))
ref.withNewWord(chooseLemma(noun, sortedSenses.take(1), POS.NOUN))
}
}

private def translateSense(phrase : SilPhrase) =
{
val word = phrase.maybeWord.get
val senses = sourceWordnet.findSenses(word.senseId)
val translatedWord = {
if (senses.isEmpty) {
word.withSense("")
} else {
val pos = senses.head.getPOS
val translatedSenses = senses.flatMap(synset => {
alignment.mapSense(synset, direction)
})
if (translatedSenses.isEmpty) {
// we must discard original sense identifiers, since they aren't
// relevant in the target tongue
word.withSense("")
val translatedPhrase = {
val word = phrase.maybeWord.get
val senses = sourceWordnet.findSenses(word.senseId)
val translatedWord = {
if (senses.isEmpty) {
word
} else {
val translatedSenseId = targetWordnet.getSenseId(translatedSenses)
// FIXME what about compound words? Also should maybe
// only preserve senses with the same lemma?
val mixedLemmas = translatedSenses.map(
_.getWords.asScala.map(_.getLemma))
val filteredLemmas = mixedLemmas.filterNot(
_.exists(lemma => lemma.head.isUpper))
val senseLemmas = {
if (filteredLemmas.isEmpty) {
mixedLemmas
} else {
filteredLemmas
}
}
val lemmas = senseLemmas.flatten.distinct.filterNot(_.startsWith("`"))
// FIXME optimize computation
val lemmaCounts = lemmas.map(lemma => {
tupleN(lemma, senseLemmas.count(_.contains(lemma)))
}).toMap
val maxCount = lemmaCounts.values.max
val maxLemmas = lemmaCounts.filter(_._2 == maxCount)
val lemma = {
if (maxLemmas.size == 1) {
maxLemmas.head._1
} else {
val candidateLemmas = mixedLemmas.find(
_.exists(
lemma => !lemma.head.isUpper)).getOrElse(mixedLemmas.head)
targetTongue.chooseVariant(pos, candidateLemmas)
}
}
SilWord("", lemma, translatedSenseId)
chooseWord(word, senses, senses.head.getPOS)
}
}
phrase.withNewWord(translatedWord)
}
phrase.withNewWord(translatedWord) match {
case nr : SilNounReference => {
scorerOpt match {
case Some(scorer) => {
reorderNounSenses(nr, scorer)
}
case _ => nr
}
translatedPhrase match {
case nr : SilNounReference if (scorerOpt.nonEmpty) => {
reorderNounSenses(nr, scorerOpt.get)
}
case x => x
}
Expand Down

0 comments on commit 9612644

Please sign in to comment.