Skip to content

Commit

Permalink
fix(language): fix language and subtitle_languages in some situat…
Browse files Browse the repository at this point in the history
…ions (#696)
  • Loading branch information
Toilal committed Apr 30, 2021
1 parent a679a6c commit f19cfda
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
6 changes: 4 additions & 2 deletions guessit/config/options.json
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,15 @@
],
"mul": [
"multi",
"multiple",
"dl"
]
},
"subtitle_affixes": [
"sub",
"subs",
"subtitle",
"subtitles",
"esub",
"esubs",
"subbed",
Expand All @@ -406,8 +409,7 @@
"legendas",
"legendado",
"subtitulado",
"soft",
"subtitles"
"soft"
],
"subtitle_suffixes": [
"subforced",
Expand Down
29 changes: 25 additions & 4 deletions guessit/rules/properties/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

from ..common import seps
from ..common.pattern import is_disabled
from ..common.words import iter_words
from ..common.validators import seps_surround
from ..common.words import iter_words


def language(config, common_words):
Expand Down Expand Up @@ -64,7 +64,8 @@ def find_languages(string, context=None):
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
RemoveLanguage,
RemoveInvalidLanguages(common_words))
RemoveInvalidLanguages(common_words),
RemoveUndeterminedLanguages)

babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])

Expand Down Expand Up @@ -226,7 +227,7 @@ def find(self, string):
key = match.property_name
if match.lang == UNDETERMINED:
undetermined_map[key].add(match)
elif match.lang == 'mul':
elif match.lang == MULTIPLE:
multi_map[key].add(match)
else:
regular_lang_map[key].add(match)
Expand Down Expand Up @@ -291,7 +292,7 @@ def iter_matches_for_candidate(self, language_word):
if match:
yield match

def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix):
"""
Return the language match for the given word and affixes.
"""
Expand Down Expand Up @@ -322,6 +323,7 @@ def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affi

if match:
return match
return None

def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
"""
Expand Down Expand Up @@ -508,3 +510,22 @@ def when(self, matches, context):
to_remove.append(match)

return to_remove


class RemoveUndeterminedLanguages(Rule):
"""Remove "und" language matches when next other language if found."""

consequence = RemoveMatch
priority = 32

def when(self, matches, context):
to_remove = []
for match in matches.range(0, len(matches.input_string),
predicate=lambda m: m.name in ('language', 'subtitle_language')):
if match.value == "und":
previous = matches.previous(match, index=0)
next_ = matches.next(match, index=0)
if previous and previous.name == 'language' or next_ and next_.name == 'language':
to_remove.append(match)

return to_remove
10 changes: 10 additions & 0 deletions guessit/test/episodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4715,3 +4715,13 @@
container: mkv
mimetype: video/x-matroska
type: episode

? '[Erai-raws] Fumetsu no Anata e - 03 [720p][Multiple Subtitle].mkv'
: release_group: Erai-raws
title: Fumetsu no Anata e
episode: 3
screen_size: 720p
subtitle_language: mul
container: mkv
mimetype: video/x-matroska
type: episode

0 comments on commit f19cfda

Please sign in to comment.