diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 1bb34fcdf9..89b9a311e4 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -738,54 +738,6 @@ class TESS_API TessBaseAPI { void GetBlockTextOrientations(int** block_orientation, bool** vertical_writing); -#ifndef DISABLED_LEGACY_ENGINE - - /** Sets Wordrec::fill_lattice_ function to point to the given function. */ - void SetFillLatticeFunc(FillLatticeFunc f); - - /** Find lines from the image making the BLOCK_LIST. */ - BLOCK_LIST* FindLinesCreateBlockList(); - - /** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ - static void DeleteBlockList(BLOCK_LIST* block_list); - - /** Returns a ROW object created from the input row specification. */ - static ROW* MakeTessOCRRow(float baseline, float xheight, float descender, - float ascender); - - /** Returns a TBLOB corresponding to the entire input image. */ - static TBLOB* MakeTBLOB(Pix* pix); - - /** - * This method baseline normalizes a TBLOB in-place. The input row is used - * for normalization. The denorm is an optional parameter in which the - * normalization-antidote is returned. - */ - static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode); - - /** This method returns the features associated with the input image. */ - void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* feature_outline_index); - - /** - * This method returns the row to which a box of specified dimensions would - * belong. If no good match is found, it returns nullptr. - */ - static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, - int bottom); - - /** - * Method to run adaptive classifier on a blob. - * It returns at max num_max_matches results. - */ - void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids, - float* ratings, int* num_matches_returned); -#endif // ndef DISABLED_LEGACY_ENGINE - /** This method returns the string form of the specified unichar. */ const char* GetUnichar(int unichar_id); @@ -848,40 +800,6 @@ class TESS_API TessBaseAPI { //// paragraphs.cpp //////////////////////////////////////////////////// TESS_LOCAL void DetectParagraphs(bool after_text_recognition); -#ifndef DISABLED_LEGACY_ENGINE - - /** @defgroup ocropusAddOns ocropus add-ons */ - /* @{ */ - - /** - * Adapt to recognize the current image as the given character. - * The image must be preloaded and be just an image of a single character. - */ - TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length, - float baseline, float xheight, - float descender, float ascender); - - /** Recognize text doing one pass only, using settings for a given pass. */ - TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); - - TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result); - - /** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ - TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths, - float** costs, int** x0, - int** y0, int** x1, int** y1, - PAGE_RES* page_res); - - TESS_LOCAL const PAGE_RES* GetPageRes() const { - return page_res_; - } - /* @} */ -#endif // ndef DISABLED_LEGACY_ENGINE - protected: Tesseract* tesseract_; ///< The underlying data object. Tesseract* osd_tesseract_; ///< For orientation & script detection. diff --git a/include/tesseract/capi.h b/include/tesseract/capi.h index 1198e33da8..3ee30927bf 100644 --- a/include/tesseract/capi.h +++ b/include/tesseract/capi.h @@ -524,40 +524,6 @@ TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor); TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline); -#ifndef DISABLED_LEGACY_ENGINE - -# ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, - TessFillLatticeFunc f); - -TESS_API void TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, - INT_FEATURE_STRUCT* int_features, - int* num_features, - int* FeatureOutlineIndex); - -TESS_API ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); - -TESS_API void TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, - int num_max_matches, - int* unichar_ids, float* ratings, - int* num_matches_returned); - -TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, float descender, - float ascender); - -TESS_API TBLOB* TessMakeTBLOB(Pix* pix); - -TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode); - -TESS_API BLOCK_LIST* TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); - -TESS_API void TessDeleteBlockList(BLOCK_LIST* block_list); - -# endif // def TESS_CAPI_INCLUDE_BASEAPI - -#endif // ndef DISABLED_LEGACY_ENGINE - #ifdef __cplusplus } #endif diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 3fb29dea1e..9a7227de42 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2001,13 +2001,6 @@ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { } } -#ifndef DISABLED_LEGACY_ENGINE -/** Sets Wordrec::fill_lattice_ function to point to the given function. */ -void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { - if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; -} -#endif // ndef DISABLED_LEGACY_ENGINE - /** Common code for setting the image. */ bool TessBaseAPI::InternalSetImage() { if (tesseract_ == nullptr) { @@ -2338,361 +2331,4 @@ STRING HOcrEscape(const char* text) { return ret; } - -#ifndef DISABLED_LEGACY_ENGINE - - -// ____________________________________________________________________________ -// Ocropus add-ons. - -/** Find lines from the image making the BLOCK_LIST. */ -BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { - ASSERT_HOST(FindLines() == 0); - BLOCK_LIST* result = block_list_; - block_list_ = nullptr; - return result; -} - -/** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ -void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { - delete block_list; -} - - -ROW *TessBaseAPI::MakeTessOCRRow(float baseline, - float xheight, - float descender, - float ascender) { - int32_t xstarts[] = {-32000}; - double quad_coeffs[] = {0, 0, baseline}; - return new ROW(1, - xstarts, - quad_coeffs, - xheight, - ascender - (baseline + xheight), - descender - baseline, - 0, - 0); -} - -/** Creates a TBLOB* from the whole pix. */ -TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - BLOCK block("a character", true, 0, 0, 0, 0, width, height); - - // Create C_BLOBs from the page - extract_edges(pix, &block); - - // Merge all C_BLOBs - C_BLOB_LIST *list = block.blob_list(); - C_BLOB_IT c_blob_it(list); - if (c_blob_it.empty()) - return nullptr; - // Move all the outlines to the first blob. - C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); - for (c_blob_it.forward(); - !c_blob_it.at_first(); - c_blob_it.forward()) { - C_BLOB *c_blob = c_blob_it.data(); - ol_it.add_list_after(c_blob->out_list()); - } - // Convert the first blob to the output TBLOB. - return TBLOB::PolygonalCopy(false, c_blob_it.data()); -} - -/** - * This method baseline normalizes a TBLOB in-place. The input row is used - * for normalization. The denorm is an optional parameter in which the - * normalization-antidote is returned. - */ -void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { - TBOX box = tblob->bounding_box(); - float x_center = (box.left() + box.right()) / 2.0f; - float baseline = row->base_line(x_center); - float scale = kBlnXHeight / row->x_height(); - tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); -} - -/** - * Return a TBLOB * from the whole pix. - * To be freed later with delete. - */ -static TBLOB *make_tesseract_blob(float baseline, float xheight, - float descender, float ascender, - bool numeric_mode, Pix* pix) { - TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); - - // Normalize TBLOB - ROW *row = - TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); - delete row; - return tblob; -} - -/** - * Adapt to recognize the current image as the given character. - * The image must be preloaded into pix_binary_ and be just an image - * of a single character. - */ -void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender) { - UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, - tesseract_->classify_bln_numeric_mode, - tesseract_->pix_binary()); - float threshold; - float best_rating = -100; - - - // Classify to get a raw choice. - BLOB_CHOICE_LIST choices; - tesseract_->AdaptiveClassifier(blob, &choices); - BLOB_CHOICE_IT choice_it; - choice_it.set_to_list(&choices); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - if (choice_it.data()->rating() > best_rating) { - best_rating = choice_it.data()->rating(); - } - } - - threshold = tesseract_->matcher_good_threshold; - - if (blob->outlines) - tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, - tesseract_->AdaptedTemplates); - delete blob; -} - - -PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - auto *page_res = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); - return page_res; -} - -PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result) { - if (!pass1_result) - pass1_result = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); - return pass1_result; -} - -struct TESS_CHAR : ELIST_LINK { - char *unicode_repr; - int length; // of unicode_repr - float cost; - TBOX box; - - TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { - length = (len == -1 ? strlen(repr) : len); - unicode_repr = new char[length + 1]; - strncpy(unicode_repr, repr, length); - } - - TESS_CHAR() - : unicode_repr(nullptr), - length(0), - cost(0.0f) - { // Satisfies ELISTIZE. - } - ~TESS_CHAR() { - delete [] unicode_repr; - } -}; - -ELISTIZEH(TESS_CHAR) -ELISTIZE(TESS_CHAR) - -static void add_space(TESS_CHAR_IT* it) { - auto *t = new TESS_CHAR(0, " "); - it->add_after_then_move(t); -} - - -static float rating_to_cost(float rating) { - rating = 100 + rating; - // cuddled that to save from coverage profiler - // (I have never seen ratings worse than -100, - // but the check won't hurt) - if (rating < 0) rating = 0; - return rating; -} - -/** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ -static void extract_result(TESS_CHAR_IT* out, - PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - int word_count = 0; - while (page_res_it.word() != nullptr) { - WERD_RES *word = page_res_it.word(); - const char *str = word->best_choice->unichar_string().c_str(); - const char *len = word->best_choice->unichar_lengths().c_str(); - TBOX real_rect = word->word->bounding_box(); - - if (word_count) - add_space(out); - int n = strlen(len); - for (int i = 0; i < n; i++) { - auto *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), - str, *len); - tc->box = real_rect.intersection(word->box_word->BlobBox(i)); - out->add_after_then_move(tc); - str += *len; - len++; - } - page_res_it.forward(); - word_count++; - } -} - -/** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ -int TessBaseAPI::TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res) { - TESS_CHAR_LIST tess_chars; - TESS_CHAR_IT tess_chars_it(&tess_chars); - extract_result(&tess_chars_it, page_res); - tess_chars_it.move_to_first(); - int n = tess_chars.length(); - int text_len = 0; - *lengths = new int[n]; - *costs = new float[n]; - *x0 = new int[n]; - *y0 = new int[n]; - *x1 = new int[n]; - *y1 = new int[n]; - int i = 0; - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward(), i++) { - TESS_CHAR *tc = tess_chars_it.data(); - text_len += (*lengths)[i] = tc->length; - (*costs)[i] = tc->cost; - (*x0)[i] = tc->box.left(); - (*y0)[i] = tc->box.bottom(); - (*x1)[i] = tc->box.right(); - (*y1)[i] = tc->box.top(); - } - char *p = *text = new char[text_len]; - - tess_chars_it.move_to_first(); - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward()) { - TESS_CHAR *tc = tess_chars_it.data(); - strncpy(p, tc->unicode_repr, tc->length); - p += tc->length; - } - return n; -} - -/** This method returns the features associated with the input blob. */ -// The resulting features are returned in int_features, which must be -// of size MAX_NUM_INT_FEATURES. The number of features is returned in -// num_features (or 0 if there was a failure). -// On return feature_outline_index is filled with an index of the outline -// corresponding to each feature in int_features. -// TODO(rays) Fix the caller to out outline_counts instead. -void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, - INT_FEATURE_STRUCT* int_features, - int* num_features, - int* feature_outline_index) { - GenericVector outline_counts; - GenericVector bl_features; - GenericVector cn_features; - INT_FX_RESULT_STRUCT fx_info; - tesseract_->ExtractFeatures(*blob, false, &bl_features, - &cn_features, &fx_info, &outline_counts); - if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { - *num_features = 0; - return; // Feature extraction failed. - } - *num_features = cn_features.size(); - memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); - // TODO(rays) Pass outline_counts back and simplify the calling code. - if (feature_outline_index != nullptr) { - int f = 0; - for (int i = 0; i < outline_counts.size(); ++i) { - while (f < outline_counts[i]) - feature_outline_index[f++] = i; - } - } -} - -// This method returns the row to which a box of specified dimensions would -// belong. If no good match is found, it returns nullptr. -ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, - int left, int top, int right, int bottom) { - TBOX box(left, bottom, right, top); - BLOCK_IT b_it(blocks); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (box.major_overlap(word->bounding_box())) - return row; - } - } - } - return nullptr; -} - -/** Method to run adaptive classifier on a blob. */ -void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned) { - auto* choices = new BLOB_CHOICE_LIST; - tesseract_->AdaptiveClassifier(blob, choices); - BLOB_CHOICE_IT choices_it(choices); - int& index = *num_matches_returned; - index = 0; - for (choices_it.mark_cycle_pt(); - !choices_it.cycled_list() && index < num_max_matches; - choices_it.forward()) { - BLOB_CHOICE* choice = choices_it.data(); - unichar_ids[index] = choice->unichar_id(); - ratings[index] = choice->rating(); - ++index; - } - *num_matches_returned = index; - delete choices; -} -#endif // ndef DISABLED_LEGACY_ENGINE - } // namespace tesseract. diff --git a/src/api/capi.cpp b/src/api/capi.cpp index b7ff4ce102..df26e7fdbe 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -41,12 +41,6 @@ void TessDeleteIntArray(const int* arr) { delete[] arr; } -#ifndef DISABLED_LEGACY_ENGINE -void TessDeleteBlockList(BLOCK_LIST* block_list) { - TessBaseAPI::DeleteBlockList(block_list); -} -#endif - TessResultRenderer* TessTextRendererCreate(const char* outputbase) { return new tesseract::TessTextRenderer(outputbase); @@ -597,25 +591,6 @@ BOOL TessBaseAPIDetectOrientationScript( return static_cast(success); } -void TessBaseAPIGetFeaturesForBlob( - TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex) { - handle->GetFeaturesForBlob(blob, int_features, num_features, - FeatureOutlineIndex); -} - -ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom) { - return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom); -} - -void TessBaseAPIRunAdaptiveClassifier( - TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids, - float* ratings, int* num_matches_returned) { - handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, - num_matches_returned); -} - #endif // ndef DISABLED_LEGACY_ENGINE const char* TessBaseAPIGetUnichar(TessBaseAPI* handle, @@ -632,22 +607,6 @@ int TessBaseAPINumDawgs(const TessBaseAPI* handle) { return handle->NumDawgs(); } -#ifndef DISABLED_LEGACY_ENGINE -ROW* TessMakeTessOCRRow(float baseline, float xheight, - float descender, float ascender) { - return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); -} - -TBLOB* TessMakeTBLOB(struct Pix* pix) { - return TessBaseAPI::MakeTBLOB(pix); -} - -void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, - BOOL numeric_mode) { - TessBaseAPI::NormalizeTBLOB(tblob, row, static_cast(numeric_mode)); -} -#endif // ndef DISABLED_LEGACY_ENGINE - TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle) { return handle->oem(); } @@ -667,13 +626,6 @@ void TessBaseGetBlockTextOrientations( handle->GetBlockTextOrientations(block_orientation, vertical_writing); } -#ifndef DISABLED_LEGACY_ENGINE -BLOCK_LIST* -TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) { - return handle->FindLinesCreateBlockList(); -} -#endif - void TessPageIteratorDelete(TessPageIterator* handle) { delete handle; }