From 94d0f77f56bb9123c4c33c97125e76e7bdb73159 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 1 Nov 2019 21:43:26 +0100 Subject: [PATCH] Don't create an empty lstmf file If Tesseract cannot find text in the input image, it should not write an empty lstmf file. This problem was reported in issue #2741. Signed-off-by: Stefan Weil --- src/ccmain/linerec.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ccmain/linerec.cpp b/src/ccmain/linerec.cpp index 7c01590cc4..0f4599dd1b 100644 --- a/src/ccmain/linerec.cpp +++ b/src/ccmain/linerec.cpp @@ -61,6 +61,10 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename, return; } TrainFromBoxes(boxes, texts, block_list, &images); + if (images.NumPages() <= 0) { + tprintf("Failed to read pages from %s\n", input_imagename.c_str()); + return; + } images.Shuffle(); if (!images.SaveDocument(lstmf_name.c_str(), nullptr)) { tprintf("Failed to write training data to %s!\n", lstmf_name.c_str());