Skip to content

Commit

Permalink
Fix fatal error when static detection with restricted subsets (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
landrok committed Apr 9, 2021
1 parent 3716cbc commit 74f62a2
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 6 deletions.
13 changes: 8 additions & 5 deletions src/LanguageDetector/LanguageDetector.php
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,17 @@ public function evaluate(string $text): self
*/
public static function detect(string $text, array $languages = []): self
{
// All specified models have been loaded
// Current loaded models
$current = !is_null(self::$detector)
? self::$detector->getLanguages()
: [];

// Differential between currently loaded and specified models
$diff = count($languages)
? array_diff(
self::$detector->getLanguages(),
$languages
)
? array_diff($current, $languages)
: [];

// Specified models need to be reloaded
if (is_null(self::$detector) || count($diff)) {
self::$detector = new self(null, $languages);
}
Expand Down
107 changes: 106 additions & 1 deletion tests/LanguageDetector/LanguageDetectionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,66 @@

class LanguageDetectionTest extends TestCase
{
/**
* @var array
*/
protected $availableLanguages = [
'th',
'mr',
'sk',
'pa',
'ru',
'el',
'ar',
'ta',
'hr',
'sq',
'ja',
'cs',
'bn',
'tl',
'i-klingon',
'it',
'ro',
'lt',
'hi',
'id',
'bg',
'lv',
'gu',
'he',
'en',
'vi',
'sv',
'ne',
'et',
'hu',
'af',
'pt',
'de',
'es',
'zh-tw',
'mk',
'pl',
'sl',
'so',
'zh-cn',
'uk',
'nl',
'no',
'ur',
'da',
'kn',
'ko',
'te',
'fr',
'sw',
'ml',
'fi',
'tr',
'fa',
];

/**
* Valid scenarios provider
*/
Expand Down Expand Up @@ -159,7 +219,7 @@ public function testGetText()
}

/**
* __construct() method
* _Tests _construct() method
*/
public function testLanguageRestriction()
{
Expand All @@ -185,4 +245,49 @@ public function testLanguageRestriction()
$detector->getLanguage()
);
}

/**
* Tests quality of the language detection with static calls and
* a restriction in tested subsets.
*
* - Randomly pick between 5 and 10 subsets
* - Add the good one
*
* @dataProvider getLanguageDetectionScenarios
*/
public function testStaticDetectionReliabilityWithRestrictedNumberOfSubsets($expected, $string, $message)
{
// Prepare a random set of languages
$numberOfSubsets = rand(5, 10);
$allowed = [$expected];

while (count($allowed) < $numberOfSubsets) {
$lang = $this->availableLanguages[ array_rand($this->availableLanguages) ];
if (!in_array($lang, $allowed)) {
array_push($allowed, $lang);
}
}
sort($allowed);

$detector = LanguageDetector::detect($string, $allowed);

// Right language detected
$this->assertEquals(
$expected,
$detector,
$message
);

// Remove not allowed language
if ($allowed[0] === '') {
array_shift($allowed);
}

// Right dataset
$this->assertEquals(
$allowed,
$detector->getLanguages(),
$message
);
}
}

0 comments on commit 74f62a2

Please sign in to comment.