From 638045133f29052187b8b7b33c83d392c590bdb4 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 17 Sep 2021 08:24:50 +0200 Subject: [PATCH] Simplify function LoadTrainingData and fix mastertrainer_test Signed-off-by: Stefan Weil --- src/training/classifier_tester.cpp | 2 +- src/training/cntraining.cpp | 4 +--- src/training/common/commontraining.cpp | 27 +++----------------------- src/training/common/commontraining.h | 5 +---- src/training/mftraining.cpp | 2 +- src/training/shapeclustering.cpp | 2 +- unittest/mastertrainer_test.cc | 5 ++--- 7 files changed, 10 insertions(+), 37 deletions(-) diff --git a/src/training/classifier_tester.cpp b/src/training/classifier_tester.cpp index e1a6e32fee..3233e43017 100644 --- a/src/training/classifier_tester.cpp +++ b/src/training/classifier_tester.cpp @@ -102,7 +102,7 @@ int main(int argc, char **argv) { tesseract::CheckSharedLibraryVersion(); ParseArguments(&argc, &argv); std::string file_prefix; - auto trainer = tesseract::LoadTrainingData(argc, argv, false, nullptr, file_prefix); + auto trainer = tesseract::LoadTrainingData(argv + 1, false, nullptr, file_prefix); tesseract::TessBaseAPI *api; // Decode the classifier string. tesseract::ShapeClassifier *shape_classifier = diff --git a/src/training/cntraining.cpp b/src/training/cntraining.cpp index a020ac16e5..2079e52984 100644 --- a/src/training/cntraining.cpp +++ b/src/training/cntraining.cpp @@ -106,7 +106,6 @@ int main(int argc, char *argv[]) { // Set the global Config parameters before parsing the command line. Config = CNConfig; - const char *PageName; LIST CharList = NIL_LIST; CLUSTERER *Clusterer = nullptr; LIST ProtoList = NIL_LIST; @@ -118,8 +117,7 @@ int main(int argc, char *argv[]) { ParseArguments(&argc, &argv); int num_fonts = 0; - int tessoptind = 1; - while ((PageName = GetNextFilename(argc, argv, tessoptind)) != nullptr) { + for (const char *PageName = *++argv; PageName != nullptr; PageName = *++argv) { printf("Reading %s ...\n", PageName); FILE *TrainingPage = fopen(PageName, "rb"); ASSERT_HOST(TrainingPage); diff --git a/src/training/common/commontraining.cpp b/src/training/common/commontraining.cpp index 431eb0ee06..67d3992338 100644 --- a/src/training/common/commontraining.cpp +++ b/src/training/common/commontraining.cpp @@ -197,7 +197,7 @@ void WriteShapeTable(const std::string &file_prefix, const ShapeTable &shape_tab * If shape_table is not nullptr, but failed to load, make a fake flat one, * as shape clustering was not run. */ -std::unique_ptr LoadTrainingData(int argc, const char *const *argv, bool replication, +std::unique_ptr LoadTrainingData(const char *const *filelist, bool replication, ShapeTable **shape_table, std::string &file_prefix) { InitFeatureDefs(&feature_defs); InitIntegerFX(); @@ -236,10 +236,8 @@ std::unique_ptr LoadTrainingData(int argc, const char *const *arg } } trainer->SetFeatureSpace(fs); - const char *page_name; - // Load training data from .tr files on the command line. - int tessoptind = 1; - while ((page_name = GetNextFilename(argc, argv, tessoptind)) != nullptr) { + // Load training data from .tr files in filelist (terminated by nullptr). + for (const char *page_name = *filelist++; page_name != nullptr; page_name = *filelist++) { tprintf("Reading %s ...\n", page_name); trainer->ReadTrainingSamples(page_name, feature_defs, false); @@ -291,25 +289,6 @@ std::unique_ptr LoadTrainingData(int argc, const char *const *arg return trainer; } -/*---------------------------------------------------------------------------*/ -/** - * This routine returns the next command line argument. If - * there are no remaining command line arguments, it returns - * nullptr. This routine should only be called after all option - * arguments have been parsed and removed with ParseArguments. - * - * Globals: - * - tessoptind defined by tessopt sys call - * @return Next command line argument or nullptr. - */ -const char *GetNextFilename(int argc, const char *const *argv, int &tessoptind) { - if (tessoptind < argc) { - return argv[tessoptind++]; - } else { - return nullptr; - } -} /* GetNextFilename */ - /*---------------------------------------------------------------------------*/ /** * This routine searches through a list of labeled lists to find diff --git a/src/training/common/commontraining.h b/src/training/common/commontraining.h index 98b6e3437d..f7c1bfe0b1 100644 --- a/src/training/common/commontraining.h +++ b/src/training/common/commontraining.h @@ -121,12 +121,9 @@ void WriteShapeTable(const std::string &file_prefix, const ShapeTable &shape_tab // If shape_table is not nullptr, but failed to load, make a fake flat one, // as shape clustering was not run. TESS_COMMON_TRAINING_API -std::unique_ptr LoadTrainingData(int argc, const char *const *argv, bool replication, +std::unique_ptr LoadTrainingData(const char *const *filelist, bool replication, ShapeTable **shape_table, std::string &file_prefix); -TESS_COMMON_TRAINING_API -const char *GetNextFilename(int argc, const char *const *argv, int &tessoptind); - LABELEDLIST FindList(tesseract::LIST List, const std::string &Label); TESS_COMMON_TRAINING_API diff --git a/src/training/mftraining.cpp b/src/training/mftraining.cpp index d0975c3707..4f8c18e278 100644 --- a/src/training/mftraining.cpp +++ b/src/training/mftraining.cpp @@ -198,7 +198,7 @@ int main(int argc, char **argv) { ShapeTable *shape_table = nullptr; std::string file_prefix; // Load the training data. - auto trainer = tesseract::LoadTrainingData(argc, argv, false, &shape_table, file_prefix); + auto trainer = tesseract::LoadTrainingData(argv + 1, false, &shape_table, file_prefix); if (trainer == nullptr) { return 1; // Failed. } diff --git a/src/training/shapeclustering.cpp b/src/training/shapeclustering.cpp index 242688bb1d..eb95b1237c 100644 --- a/src/training/shapeclustering.cpp +++ b/src/training/shapeclustering.cpp @@ -47,7 +47,7 @@ int main(int argc, char **argv) { ParseArguments(&argc, &argv); std::string file_prefix; - auto trainer = tesseract::LoadTrainingData(argc, argv, false, nullptr, file_prefix); + auto trainer = tesseract::LoadTrainingData(argv + 1, false, nullptr, file_prefix); if (!trainer) { return 1; diff --git a/unittest/mastertrainer_test.cc b/unittest/mastertrainer_test.cc index dcd64af1e1..b6dcc5bb5a 100644 --- a/unittest/mastertrainer_test.cc +++ b/unittest/mastertrainer_test.cc @@ -173,12 +173,11 @@ class MasterTrainerTest : public testing::Test { FLAGS_X = TestDataNameToPath("eng.xheights").c_str(); FLAGS_U = TestDataNameToPath("eng.unicharset").c_str(); std::string tr_file_name(TestDataNameToPath("eng.Arial.exp0.tr")); - const char *argv[] = {tr_file_name.c_str()}; - int argc = 1; + const char *filelist[] = {tr_file_name.c_str(), nullptr}; std::string file_prefix; delete shape_table_; shape_table_ = nullptr; - master_trainer_ = LoadTrainingData(argc, argv, false, &shape_table_, file_prefix); + master_trainer_ = LoadTrainingData(filelist, false, &shape_table_, file_prefix); EXPECT_TRUE(master_trainer_ != nullptr); EXPECT_TRUE(shape_table_ != nullptr); }