diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp index f551202137..a07c934e8e 100644 --- a/src/ccmain/tessedit.cpp +++ b/src/ccmain/tessedit.cpp @@ -23,6 +23,8 @@ # include "config_auto.h" #endif +#include // for std::regex_match + #include "control.h" #include "matchdefs.h" #include "pageres.h" @@ -247,6 +249,15 @@ static bool IsStrInList(const std::string &str, const std::vector & void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector *to_load, std::vector *not_to_load) { std::string remains(lang_str); + // Look whether the model file uses a prefix which must be applied to + // included model files as well. + std::regex e("(.*)/[^/]*"); + std::cmatch cm; + std::string prefix; + if (std::regex_match(lang.c_str(), cm, e, std::regex_constants::match_default)) { + // A prefix was found. + prefix = cm[1].str() + "/"; + } while (!remains.empty()) { // Find the start of the lang code and which vector to add to. const char *start = remains.c_str(); @@ -268,6 +279,7 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vectorpush_back(lang_code); @@ -296,7 +308,10 @@ int Tesseract::init_tesseract(const std::string &arg0, const std::string &textba // Add any languages that this language requires bool loaded_primary = false; // Load the rest into sub_langs_. - for (auto &lang_to_load : langs_to_load) { + // A range based for loop does not work here because langs_to_load + // might be changed in the loop when a new submodel is found. + for (size_t lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { + auto &lang_to_load = langs_to_load[lang_index]; if (!IsStrInList(lang_to_load, langs_not_to_load)) { const char *lang_str = lang_to_load.c_str(); Tesseract *tess_to_init;