Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for text angle/gradient to be retrieved #4070

Merged
merged 6 commits into from
May 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/tesseract/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,11 @@ class TESS_API TessBaseAPI {
*/
Pix *GetThresholdedImage();

/**
* Return average gradient of lines on page.
*/
float GetGradient();

/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
Expand Down
1 change: 1 addition & 0 deletions include/tesseract/capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);

TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API float TessBaseAPIGetGradient(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
Expand Down
7 changes: 7 additions & 0 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2201,6 +2201,13 @@ int TessBaseAPI::FindLines() {
return 0;
}

/**
* Return average gradient of lines on page.
*/
float TessBaseAPI::GetGradient() {
return tesseract_->gradient();
}

/** Delete the pageres and clear the block list ready for a new page. */
void TessBaseAPI::ClearResults() {
if (tesseract_ != nullptr) {
Expand Down
4 changes: 4 additions & 0 deletions src/api/capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,10 @@ struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle) {
return handle->GetThresholdedImage();
}

float TessBaseAPIGetGradient(TessBaseAPI *handle) {
return handle->GetGradient();
}

void TessBaseAPIClearPersistentCache(TessBaseAPI * /*handle*/) {
TessBaseAPI::ClearPersistentCache();
}
Expand Down
2 changes: 1 addition & 1 deletion src/ccmain/pagesegmain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
bool cjk_mode = textord_use_cjk_fp_model;

textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_,
pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks);
pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks, &gradient_);
return auto_page_seg_ret_val;
}

Expand Down
2 changes: 2 additions & 0 deletions src/ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ Tesseract::Tesseract()
, scaled_factor_(-1)
, deskew_(1.0f, 0.0f)
, reskew_(1.0f, 0.0f)
, gradient_(0.0f)
, most_recently_used_(this)
, font_table_size_(0)
#ifndef DISABLED_LEGACY_ENGINE
Expand Down Expand Up @@ -495,6 +496,7 @@ void Tesseract::Clear() {
scaled_color_.destroy();
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
gradient_ = 0.0f;
splitter_.Clear();
scaled_factor_ = -1;
for (auto &sub_lang : sub_langs_) {
Expand Down
4 changes: 4 additions & 0 deletions src/ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ class TESS_API Tesseract : public Wordrec {
const FCOORD &reskew() const {
return reskew_;
}
float gradient() const {
return gradient_;
}
// Destroy any existing pix and return a pointer to the pointer.
Image *mutable_pix_binary() {
pix_binary_.destroy();
Expand Down Expand Up @@ -1001,6 +1004,7 @@ class TESS_API Tesseract : public Wordrec {
int scaled_factor_;
FCOORD deskew_;
FCOORD reskew_;
float gradient_;
amitdo marked this conversation as resolved.
Show resolved Hide resolved
TesseractStats stats_;
// Sub-languages to be tried in addition to this.
std::vector<Tesseract *> sub_langs_;
Expand Down
11 changes: 5 additions & 6 deletions src/textord/textord.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ Textord::Textord(CCStruct *ccstruct)
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks) {
TO_BLOCK_LIST *to_blocks, float *gradient) {
page_tr_.set_x(width);
page_tr_.set_y(height);
if (to_blocks->empty()) {
Expand Down Expand Up @@ -219,15 +219,14 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi
TO_BLOCK_IT to_block_it(to_blocks);
TO_BLOCK *to_block = to_block_it.data();
// Make the rows in the block.
float gradient;
// Do it the old fashioned way.
if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
gradient = make_rows(page_tr_, to_blocks);
*gradient = make_rows(page_tr_, to_blocks);
} else if (!PSM_SPARSE(pageseg_mode)) {
// RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
*gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
} else {
gradient = 0.0f;
*gradient = 0.0f;
}
BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
baseline_detector.ComputeStraightBaselines(use_box_bottoms);
Expand All @@ -236,7 +235,7 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi
// Now make the words in the lines.
if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
// SINGLE_LINE uses the old word maker on the single line.
make_words(this, page_tr_, gradient, blocks, to_blocks);
make_words(this, page_tr_, *gradient, blocks, to_blocks);
} else {
// SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
// single word, and in SINGLE_CHAR mode, all the outlines
Expand Down
3 changes: 2 additions & 1 deletion src/textord/textord.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ class Textord {
// to the appropriate word(s) in case they are really diacritics.
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
float *gradient);

// If we were supposed to return only a single textline, and there is more
// than one, clean up and leave only the best.
Expand Down