Skip to content

Commit

Permalink
more doxygen
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@445 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
joregan committed Jul 27, 2010
1 parent 7efbd3d commit b6e3cbe
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 85 deletions.
96 changes: 50 additions & 46 deletions ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,16 @@ inT16 blob_count(WERD *w) {
}


/**********************************************************************
/**
* recog_pseudo_word
*
* Make a word from the selected blobs and run Tess on them.
**********************************************************************/
*
* @param block_list recognise blobs
* @param selection_box within this box
*/
namespace tesseract {
void Tesseract::recog_pseudo_word( // recognize blobs
BLOCK_LIST *block_list, // blocks to check
void Tesseract::recog_pseudo_word(BLOCK_LIST *block_list,
TBOX &selection_box) {
WERD *word;
ROW *pseudo_row; // row of word
Expand All @@ -191,16 +193,18 @@ void Tesseract::recog_pseudo_word( // recognize blobs
}


/**********************************************************************
/**
* recog_interactive
*
* Recognize a single word in interactive mode.
**********************************************************************/
BOOL8 Tesseract::recog_interactive( //recognize blobs
BLOCK *block, //block
ROW *row, //row of word
WERD *word //word to recognize
) {
*
* @param block block
* @param row row of word
* @param word word to recognise
*/
BOOL8 Tesseract::recog_interactive(BLOCK *block,
ROW *row,
WERD *word) {
WERD_RES word_res(word);
inT16 char_qual;
inT16 good_char_qual;
Expand All @@ -219,23 +223,23 @@ BOOL8 Tesseract::recog_interactive( //recognize blobs
}


/**********************************************************************
/**
* recog_all_words()
*
* Walk the current block list applying the specified word processor function
* to all words
**********************************************************************/
*
* @param page_res page structure
* @param monitor progress monitor
* @param target_word_box specifies just to extract a rectangle
* @param dopasses 0 - all, 1 just pass 1, 2 passes 2 and higher
*/

void Tesseract::recog_all_words( // process words
PAGE_RES *page_res, // page structure
// progress monitor
void Tesseract::recog_all_words(PAGE_RES *page_res,
volatile ETEXT_DESC *monitor,
// specifies just to extract a rectangle
TBOX *target_word_box,
//0 - all, 1 just pass 1, 2 passes 2 and higher
inT16 dopasses
) {
// reset page iterator
inT16 dopasses) {
// reset page iterator
static PAGE_RES_IT page_res_it;
inT16 chars_in_word;
inT16 rejects_in_word;
Expand Down Expand Up @@ -587,11 +591,11 @@ if (dopasses==1) return;
}


/**********************************************************************
/**
* classify_word_pass1
*
* Baseline normalize the word and pass it to Tess.
**********************************************************************/
*/

void Tesseract::classify_word_pass1( //recog one word
WERD_RES *word, //word to do
Expand Down Expand Up @@ -744,11 +748,11 @@ void Tesseract::classify_word_pass1( //recog one word
}
}

/**********************************************************************
/**
* classify_word_pass2
*
* Control what to do with the word in pass 2
**********************************************************************/
*/

void Tesseract::classify_word_pass2(WERD_RES *word, BLOCK* block, ROW *row) {
BOOL8 done_this_pass = FALSE;
Expand Down Expand Up @@ -937,11 +941,11 @@ void Tesseract::classify_word_pass2(WERD_RES *word, BLOCK* block, ROW *row) {
}


/**********************************************************************
/**
* match_word_pass2
*
* Baseline normalize the word and pass it to Tess.
**********************************************************************/
*/

void Tesseract::match_word_pass2( //recog one word
WERD_RES *word, //word to do
Expand Down Expand Up @@ -1053,14 +1057,14 @@ void Tesseract::match_word_pass2( //recog one word
} // namespace tesseract


/*************************************************************************
namespace tesseract {
/**
* fix_rep_char()
* The word is a repeated char. Find the repeated char character. Make a reject
* string which rejects any char other than the voted char. Set the word to done
* to stop rematching it.
*
*************************************************************************/
namespace tesseract {
*/
void Tesseract::fix_rep_char(WERD_RES *word_res) {
struct REP_CH {
UNICHAR_ID unichar_id;
Expand Down Expand Up @@ -1127,11 +1131,11 @@ static int is_simple_quote(const char* signed_str, int length) {
*(str + 2) == 0x99)));
}

/**********************************************************************
/**
* fix_quotes
*
* Change pairs of quotes to double quotes.
**********************************************************************/
*/
void Tesseract::fix_quotes(WERD_CHOICE *choice, //choice to fix
WERD *word, //word to do //char choices
BLOB_CHOICE_LIST_CLIST *blob_choices) {
Expand Down Expand Up @@ -1176,12 +1180,12 @@ void Tesseract::fix_quotes(WERD_CHOICE *choice, //choice to fix
}


/**********************************************************************
/**
* fix_hyphens
*
* Change pairs of hyphens to a single hyphen if the bounding boxes touch
* Typically a long dash which has been segmented.
**********************************************************************/
*/
void Tesseract::fix_hyphens( //crunch double hyphens
WERD_CHOICE *choice, //choice to fix
WERD *word, //word to do //char choices
Expand Down Expand Up @@ -1230,11 +1234,11 @@ void Tesseract::fix_hyphens( //crunch double hyphens
} // namespace tesseract


/**********************************************************************
/**
* merge_blobs
*
* Add the outlines from blob2 to blob1. Blob2 is emptied but not deleted.
**********************************************************************/
*/

void merge_blobs( //combine 2 blobs
PBLOB *blob1, //dest blob
Expand Down Expand Up @@ -1307,13 +1311,13 @@ void choice_dump_tester( //dump chars in word
}
*/

/*************************************************************************
/**
* make_bln_copy()
*
* Generate a baseline normalised copy of the source word. The copy is done so
* that whatever format the original word is in, a polygonal bln version is
* generated as output.
*************************************************************************/
*/

WERD *make_bln_copy(WERD *src_word, ROW *row, BLOCK* block,
float x_height, DENORM *denorm) {
Expand Down Expand Up @@ -1527,11 +1531,11 @@ BOOL8 check_debug_pt(WERD_RES *word, int location) {
}


/**********************************************************************
/**
* set_word_fonts
*
* Get the fonts for the word.
**********************************************************************/
*/
namespace tesseract {
void Tesseract::set_word_fonts(
WERD_RES *word, // word to adapt to
Expand Down Expand Up @@ -1625,11 +1629,11 @@ void Tesseract::set_word_fonts(
}


/**********************************************************************
/**
* font_recognition_pass
*
* Smooth the fonts for the document.
**********************************************************************/
*/

void Tesseract::font_recognition_pass( //good chars in word
PAGE_RES_IT &page_res_it) {
Expand Down Expand Up @@ -1753,11 +1757,11 @@ void Tesseract::font_recognition_pass( //good chars in word
} // namespace tesseract


/**********************************************************************
/**
* add_in_one_row
*
* Add into the stats for one row.
**********************************************************************/
*/
//dead code?
void add_in_one_row( //good chars in word
ROW_RES *row, //current row
Expand All @@ -1781,11 +1785,11 @@ void add_in_one_row( //good chars in word
}


/**********************************************************************
/**
* find_modal_font
*
* Find the modal font and remove from the stats.
**********************************************************************/
*/
//make static?
void find_modal_font( //good chars in word
STATS *fonts, //font stats
Expand Down
80 changes: 41 additions & 39 deletions ccmain/fixxht.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ EXTERN STRING_VAR (chs_bl,
"Baseline chars");
EXTERN STRING_VAR (chs_non_ambig_desc, "gq", "Reliable descender chars");

/*************************************************************************
/**
* re_estimate_x_ht()
*
* Walk the blobs in the word together with the text string and reject map.
Expand All @@ -97,43 +97,45 @@ EXTERN STRING_VAR (chs_non_ambig_desc, "gq", "Reliable descender chars");
*
* A) Try to re-estimatate x-ht and caps ht from confirmed pts in word.
*
* FOR each non reject blob
* IF char is baseline posn ambiguous
* Remove ambiguity by comparing its posn with respect to baseline.
* IF char is a confirmed x-ht char
* Add x-ht posn to confirmed_x_ht pts for word
* IF char is a confirmed caps-ht char
* Add blob_ht to caps ht pts for word
*
* IF Std Dev of caps hts < 2 (AND # samples > 0)
* Use mean as caps ht estimate (Dont use median as we can expect a
* fair variation between the heights of the NON_AMBIG_CAPS_HT_CHS)
* IF Std Dev of caps hts >= 2 (AND # samples > 0)
* Suspect small caps font.
* Look for 2 clusters, each with Std Dev < 2.
* IF 2 clusters found
* Pick the smaller median as the caps ht estimate of the smallcaps.
*
* IF failed to estimate a caps ht
* Use the median caps ht if there is one,
* ELSE use the caps ht estimate of the previous word. NO!!!
*
*
* IF there are confirmed x-height chars
* Estimate confirmed x-height as the median value
* ELSE IF there is a confirmed caps ht
* Estimate confirmed x-height as a fraction of confirmed caps ht value
* ELSE
* Use the value for the previous word or the row value if this is the
* first word in the block. NO!!!
* @verbatim
FOR each non reject blob
IF char is baseline posn ambiguous
Remove ambiguity by comparing its posn with respect to baseline.
IF char is a confirmed x-ht char
Add x-ht posn to confirmed_x_ht pts for word
IF char is a confirmed caps-ht char
Add blob_ht to caps ht pts for word
IF Std Dev of caps hts < 2 (AND # samples > 0)
Use mean as caps ht estimate (Dont use median as we can expect a
fair variation between the heights of the NON_AMBIG_CAPS_HT_CHS)
IF Std Dev of caps hts >= 2 (AND # samples > 0)
Suspect small caps font.
Look for 2 clusters, each with Std Dev < 2.
IF 2 clusters found
Pick the smaller median as the caps ht estimate of the smallcaps.
IF failed to estimate a caps ht
Use the median caps ht if there is one,
ELSE use the caps ht estimate of the previous word. NO!!!
IF there are confirmed x-height chars
Estimate confirmed x-height as the median value
ELSE IF there is a confirmed caps ht
Estimate confirmed x-height as a fraction of confirmed caps ht value
ELSE
Use the value for the previous word or the row value if this is the
first word in the block. NO!!!
@endverbatim
*
* B) Add in case ambiguous blobs based on confirmed x-ht/caps ht, changing case
* as necessary. Reestimate caps ht and x-ht as in A, using the extended
* clusters.
*
* C) If word contains rejects, and x-ht estimate significantly differs from
* original estimate, return TRUE so that the word can be rematched
*************************************************************************/
*/

void re_estimate_x_ht( //improve for 1 word
WERD_RES *word_res, //word to do
Expand Down Expand Up @@ -496,12 +498,12 @@ void re_estimate_x_ht( //improve for 1 word
}


/*************************************************************************
namespace tesseract {
/**
* check_block_occ()
* Checks word for coarse block occupancy, rejecting more chars and flipping
* case of case ambiguous chars as required.
*************************************************************************/
namespace tesseract {
*/
void Tesseract::check_block_occ(WERD_RES *word_res) {
PBLOB_IT blob_it;
STRING new_string;
Expand Down Expand Up @@ -580,12 +582,12 @@ void Tesseract::check_block_occ(WERD_RES *word_res) {
}
} // namespace tesseract

/*************************************************************************
/**
* check_blob_occ()
*
* Checks blob for position relative to position above baseline
* Return 0 for reject, or (possibly case shifted) confirmed char
*************************************************************************/
* @return 0 for reject, or (possibly case shifted) confirmed char
*/

void check_blob_occ(char* proposed_char,
inT16 blob_ht_above_baseline,
Expand Down Expand Up @@ -804,12 +806,12 @@ void est_ambigs( //xht ambig ht stats
}


/*************************************************************************
/**
* dodgy_blob()
* Returns true if the blob has more than one outline, one above the other.
* These are dodgy as the top blob could be noise, causing the bounding box xht
* to be misleading
*************************************************************************/
*/

BOOL8 dodgy_blob(PBLOB *blob) {
OUTLINE_IT outline_it = blob->out_list ();
Expand Down

0 comments on commit b6e3cbe

Please sign in to comment.