Skip to content

Commit

Permalink
Bunch of minor bug fixes/cleanups
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1106 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith@gmail.com committed May 21, 2014
1 parent 25a8c7b commit 9708041
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 47 deletions.
9 changes: 5 additions & 4 deletions ccmain/pageiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,15 +432,16 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
Pix* original_img,
int* left, int* top) const {
int right, bottom;
if (!BoundingBox(level, left, top, &right, &bottom))
return NULL;
Pix* pix = tesseract_->pix_grey();
if (pix == NULL)
if (original_img == NULL)
return GetBinaryImage(level);

// Expand the box.
Expand All @@ -449,7 +450,7 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
right = MIN(right + padding, rect_width_);
bottom = MIN(bottom + padding, rect_height_);
Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
Pix* grey_pix = pixClipRectangle(pix, box, NULL);
Pix* grey_pix = pixClipRectangle(original_img, box, NULL);
boxDestroy(&box);
if (level == RIL_BLOCK) {
Pix* mask = it_->block()->block->render_mask();
Expand All @@ -460,7 +461,7 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
pixDestroy(&mask);
pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1);
pixInvert(expanded_mask, expanded_mask);
pixSetMasked(grey_pix, expanded_mask, 255);
pixSetMasked(grey_pix, expanded_mask, MAX_UINT32);
pixDestroy(&expanded_mask);
}
return grey_pix;
Expand Down
5 changes: 3 additions & 2 deletions ccmain/pageiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class TESS_API PageIterator {
// ============= Moving around within the page ============.

/**
* Moves the iterator to point to the start of the page to begin an
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
Expand Down Expand Up @@ -234,9 +234,10 @@ class TESS_API PageIterator {
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix* GetImage(PageIteratorLevel level, int padding,
Pix* GetImage(PageIteratorLevel level, int padding, Pix* original_img,
int* left, int* top) const;

/**
Expand Down
2 changes: 1 addition & 1 deletion ccutil/scanutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ uintmax_t streamtoumax(FILE* s, int base) {
} else if (base == 16) {
if (c == '0') {
c = fgetc(s);
if (c == 'x' && c == 'X') c = fgetc(s);
if (c == 'x' || c == 'X') c = fgetc(s);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cube/cube_search_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt);
int seg_cnt = end_pt - start_pt;
double prob_val = (1.0 / class_cnt) *
exp(-abs(seg_cnt - 2.0)) *
exp(-fabs(seg_cnt - 2.0)) *
exp(-samp->Width() / static_cast<double>(samp->Height()));

if (alt_list) {
Expand Down
8 changes: 4 additions & 4 deletions textord/cjkpitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ class FPRow {
box2.height() >= pitch * (1.0 + kFPTolerance)) return false;

const float real_pitch = box_pitch(box1, box2);
if (abs(real_pitch - pitch) < pitch * kFPTolerance) return true;
if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;

if (textord_space_size_is_variable) {
// Hangul characters usually have fixed pitch, but words are
Expand Down Expand Up @@ -631,9 +631,9 @@ void FPRow::EstimatePitch(bool pass1) {
// character may have a good pitch only between its successor.
// So we collect only pitch values between two good
// characters. and within tolerance in pass2.
if (pass1 ||
(prev_was_good &&
abs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
if (pass1 || (prev_was_good &&
fabs(estimated_pitch_ - pitch) <
kFPTolerance * estimated_pitch_)) {
good_pitches_.Add(pitch);
if (!is_box_modified(i - 1) && !is_box_modified(i)) {
good_gaps_.Add(gap);
Expand Down
10 changes: 5 additions & 5 deletions textord/makerow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2667,14 +2667,16 @@ void mark_repeated_chars(TO_ROW *row) {
if (!box_it.empty()) {
do {
BLOBNBOX* bblob = box_it.data();
int repeat_length = 0;
int repeat_length = 1;
if (bblob->flow() == BTFT_LEADER &&
!bblob->joined_to_prev() && bblob->cblob() != NULL) {
BLOBNBOX_IT test_it(box_it);
for (test_it.forward(); !test_it.at_first(); test_it.forward()) {
for (test_it.forward(); !test_it.at_first();) {
bblob = test_it.data();
if (bblob->flow() != BTFT_LEADER)
break;
test_it.forward();
bblob = test_it.data();
if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
repeat_length = 0;
break;
Expand All @@ -2688,11 +2690,9 @@ void mark_repeated_chars(TO_ROW *row) {
bblob = box_it.data();
bblob->set_repeated_set(num_repeated_sets);
}
if (!box_it.at_first())
bblob->set_repeated_set(0);
} else {
box_it.forward();
bblob->set_repeated_set(0);
box_it.forward();
}
} while (!box_it.at_first()); // until all done
}
Expand Down
52 changes: 26 additions & 26 deletions textord/tospace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,25 @@ void Textord::block_spacing_stats(
TO_BLOCK *block,
GAPMAP *gapmap,
BOOL8 &old_text_ord_proportional,
inT16 &block_space_gap_width, //resulting estimate
inT16 &block_non_space_gap_width //resulting estimate
inT16 &block_space_gap_width, // resulting estimate
inT16 &block_non_space_gap_width // resulting estimate
) {
TO_ROW_IT row_it; //row iterator
TO_ROW *row; //current row
BLOBNBOX_IT blob_it; //iterator
TO_ROW_IT row_it; // row iterator
TO_ROW *row; // current row
BLOBNBOX_IT blob_it; // iterator

STATS centre_to_centre_stats (0, MAXSPACING);
//DEBUG USE ONLY
// DEBUG USE ONLY
STATS all_gap_stats (0, MAXSPACING);
STATS space_gap_stats (0, MAXSPACING);
inT16 minwidth = MAX_INT16; //narrowest blob
inT16 minwidth = MAXSPACING; // narrowest blob
TBOX blob_box;
TBOX prev_blob_box;
inT16 centre_to_centre;
inT16 gap_width;
float real_space_threshold;
float iqr_centre_to_centre; //DEBUG USE ONLY
float iqr_all_gap_stats; //DEBUG USE ONLY
float iqr_centre_to_centre; // DEBUG USE ONLY
float iqr_all_gap_stats; // DEBUG USE ONLY
inT32 end_of_row;
inT32 row_length;

Expand Down Expand Up @@ -885,32 +885,32 @@ ROW *Textord::make_prop_words(
TO_ROW *row, // row to make
FCOORD rotation // for drawing
) {
BOOL8 bol; //start of line
BOOL8 bol; // start of line
/* prev_ values are for start of word being built. non prev_ values are for
the gap between the word being built and the next one. */
BOOL8 prev_fuzzy_sp; //probably space
BOOL8 prev_fuzzy_non; //probably not
uinT8 prev_blanks; //in front of word
BOOL8 fuzzy_sp; //probably space
BOOL8 fuzzy_non; //probably not
uinT8 blanks; //in front of word
BOOL8 prev_fuzzy_sp; // probably space
BOOL8 prev_fuzzy_non; // probably not
uinT8 prev_blanks; // in front of word
BOOL8 fuzzy_sp = false; // probably space
BOOL8 fuzzy_non = false; // probably not
uinT8 blanks = 0; // in front of word
BOOL8 prev_gap_was_a_space = FALSE;
BOOL8 break_at_next_gap = FALSE;
ROW *real_row; //output row
ROW *real_row; // output row
C_OUTLINE_IT cout_it;
C_BLOB_LIST cblobs;
C_BLOB_IT cblob_it = &cblobs;
WERD_LIST words;
WERD_IT word_it; //new words
WERD *word; //new word
WERD_IT rep_char_it; //repeated char words
WERD_IT word_it; // new words
WERD *word; // new word
WERD_IT rep_char_it; // repeated char words
inT32 next_rep_char_word_right = MAX_INT32;
float repetition_spacing; //gap between repetitions
inT32 xstarts[2]; //row ends
inT32 prev_x; //end of prev blob
BLOBNBOX *bblob; //current blob
TBOX blob_box; //bounding box
BLOBNBOX_IT box_it; //iterator
float repetition_spacing; // gap between repetitions
inT32 xstarts[2]; // row ends
inT32 prev_x; // end of prev blob
BLOBNBOX *bblob; // current blob
TBOX blob_box; // bounding box
BLOBNBOX_IT box_it; // iterator
TBOX prev_blob_box;
TBOX next_blob_box;
inT16 prev_gap = MAX_INT16;
Expand Down
2 changes: 0 additions & 2 deletions training/pango_font_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,7 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
PangoLayout* layout;
{
// Pango is not relasing the cached layout.
#ifndef USE_STD_NAMESPACE
DISABLE_HEAP_LEAK_CHECK;
#endif
layout = pango_layout_new(context);
}
if (desc_) {
Expand Down
3 changes: 2 additions & 1 deletion training/stringrenderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ class StringRenderer {
int RenderToBinaryImage(const char* text, int text_length, int threshold,
Pix** pix);
// Renders a line of text with all available fonts that were able to render
// the text.
// at least min_coverage fraction of the input text. Use 1.0 to require that
// a font be able to render all the text.
int RenderAllFontsToImage(double min_coverage, const char* text,
int text_length, string* font_used, Pix** pix);

Expand Down
3 changes: 2 additions & 1 deletion training/text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ BOOL_PARAM_FLAG(render_per_font, true,
"Image filenames are of the form output_name.font_name.tif");
DOUBLE_PARAM_FLAG(min_coverage, 1.0,
"If find_fonts==true, the minimum coverage the font has of "
"the characters in the text file to include it, between 0 and 1.");
"the characters in the text file to include it, between "
"0 and 1.");

BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit.");

Expand Down

0 comments on commit 9708041

Please sign in to comment.