Skip to content

Commit

Permalink
Added multi-page tiff capability
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@128 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith committed Feb 1, 2008
1 parent 7bb68d2 commit dd18aea
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 222 deletions.
30 changes: 24 additions & 6 deletions ccmain/applybox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,28 @@ what measures we are interested in.

#define EXTERN
EXTERN BOOL_VAR (applybox_rebalance, TRUE, "Drop dead");
EXTERN INT_VAR (applybox_debug, 0, "Debug level");
EXTERN INT_VAR (applybox_debug, 5, "Debug level");
EXTERN INT_VAR (applybox_page, 0, "Page number to apply boxes from");
EXTERN STRING_VAR (applybox_test_exclusions, "",
"Chars ignored for testing");
EXTERN double_VAR (applybox_error_band, 0.15, "Err band as fract of xht");

// The unicharset used during box training
static UNICHARSET unicharset_boxes;

static void PrintString(const char* str) {
tprintf("%s:", str);
int step = 0;
for (int i = 0; str[i]; i += step) {
step = UNICHAR::utf8_step(str + i);
if (step == 0)
step = 1;
UNICHAR ch(str + i, step);
tprintf("[%x]", ch.first_uni());
}
tprintf("\n", str);
}

/*************************************************************************
* The code re-assigns outlines to form words each with ONE labelled blob.
* Noise is left in UNLABELLED words. The chars on the page are checked crudely
Expand Down Expand Up @@ -132,7 +146,7 @@ void apply_boxes(BLOCK_LIST *block_list //real blocks
}

clear_any_old_text(block_list);
while (read_next_box (box_file, &box, &uch_id)) {
while (read_next_box(applybox_page, box_file, &box, &uch_id)) {
box_count++;
tgt_char_counts[uch_id]++;
row = find_row_of_box (block_list, box, block_id, row_id);
Expand Down Expand Up @@ -205,16 +219,17 @@ void clear_any_old_text( //remove correct text
}


BOOL8 read_next_box(FILE* box_file, //
BOOL8 read_next_box(int page,
FILE* box_file, //
BOX *box,
UNICHAR_ID *uch_id) {
int x_min;
int y_min;
int x_max;
int y_max;
char uch[kBufSize];
char uch[kBoxReadBufSize];

while (read_next_box(box_file, uch, &x_min, &y_min, &x_max, &y_max)) {
while (read_next_box(page, box_file, uch, &x_min, &y_min, &x_max, &y_max)) {
if (!unicharset_boxes.contains_unichar(uch))
{
unicharset_boxes.unichar_insert(uch);
Expand All @@ -225,6 +240,8 @@ BOOL8 read_next_box(FILE* box_file, //
exit(1);
}
}
// tprintf("Read box at (%d,%d), str:", x_min, y_min);
// PrintString(uch);
*uch_id = unicharset_boxes.unichar_to_id(uch);
*box = BOX (ICOORD (x_min, y_min), ICOORD (x_max, y_max));
return TRUE; //read a box ok
Expand Down Expand Up @@ -590,8 +607,9 @@ void tidy_up( //
if (tgt_char_counts[i] > labelled_char_counts[i]) {
if (labelled_char_counts[i] <= 1) {
tprintf
("APPLY_BOXES: FATALITY - %d labelled samples of \"%s\" - target is %d\n",
("APPLY_BOXES: FATALITY - %d labelled samples of \"%s\" - target is %d:\n",
labelled_char_counts[i], unicharset_boxes.id_to_unichar(i), tgt_char_counts[i]);
PrintString(unicharset_boxes.id_to_unichar(i));
}
else {
rebalance_needed = TRUE;
Expand Down
3 changes: 2 additions & 1 deletion ccmain/applybox.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ void apply_boxes(BLOCK_LIST *block_list //real blocks
void clear_any_old_text( //remove correct text
BLOCK_LIST *block_list //real blocks
);
BOOL8 read_next_box(FILE* box_file, //
BOOL8 read_next_box(int page,
FILE* box_file, //
BOX *box,
UNICHAR_ID *uch_id);
ROW *find_row_of_box( //
Expand Down
126 changes: 8 additions & 118 deletions ccmain/tessedit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,11 @@
*/
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
// Includes libtiff if HAVE_LIBTIFF is defined
#ifdef HAVE_LIBTIFF
#include "tiffio.h"
#endif
#endif

#ifdef GOOGLE3
#include "third_party/tiff/tiffio.h"
#endif

//extern "C" {
#include "callnet.h" //phils nn stuff
Expand All @@ -79,6 +75,10 @@
#define API_CONFIG "configs/api_config"
#define EXTERN

EXTERN STRING_VAR (tessedit_char_blacklist, "",
"Blacklist of chars not to recognize");
EXTERN STRING_VAR (tessedit_char_whitelist, "",
"Whitelist of chars to recognize");
EXTERN BOOL_EVAR (tessedit_write_vars, FALSE, "Write all vars to file");
EXTERN BOOL_VAR (tessedit_tweaking_tess_vars, FALSE,
"Fiddle tess config values");
Expand Down Expand Up @@ -153,6 +153,9 @@ int init_tesseract(const char *arg0,
cprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
exit(1);
}
// Set the white and blacklists (if any)
unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(),
tessedit_char_whitelist.string());

start_recog(configfile, textbase);

Expand Down Expand Up @@ -211,119 +214,6 @@ enum CMD_EVENTS
ACTION_2_CMD_EVENT
};

/**********************************************************************
* extend_menu()
*
* Function called by pgeditor to let you extend the command menu.
* Items can be added to the "MODES" and "OTHER" menus. The modes_id_base
* and other_id_base parameters are required to offset your command event ids
* from those of pgeditor, and to let the pgeditor which commands are mode
* changes and which are unmoded commands. (Sorry if you think these offsets
* are a bit kludgy, the alternative would be to duplicate all the menu
* constructor modes within pgeditor so that the offsets could be hidden.)
*
* Items for the "MODES" menu may only be simple menu items (just a name and
* id). Items for the "OTHER" menu can be editable parameters or boolean
* toggles. Refer to menu.h to see how to build different types.
**********************************************************************/

void extend_menu( //handle for "MODES"
RADIO_MENU *modes_menu,
INT16 modes_id_base, //mode cmd ids offset
NON_RADIO_MENU *other_menu, //handle for "OTHER"
INT16 other_id_base //mode cmd ids offset
) {
/* Example new mode */

modes_menu->add_child (new RADIO_MENU_LEAF ("Recog Words",
modes_id_base + RECOG_WERDS));
modes_menu->add_child (new RADIO_MENU_LEAF ("Recog Blobs",
modes_id_base + RECOG_PSEUDO));

/* Example toggle
other_menu->add_child(
new TOGGLE_MENU_LEAF( "Action 2", //Display string
other_id_base + ACTION_2_CMD_EVENT, //offset command id
FALSE ) ); //Initial value
Example text parm (commented out)
other_menu->add_child(
new VARIABLE_MENU_LEAF( "Parm change", //Display string
other_id_base + ACTION_3_CMD_EVENT, //offset command id
"default value" ) ); //default value string
*/
}


/**********************************************************************
* extend_moded_commands()
*
* Function called by pgeditor when the user is in one of the extended modes
* defined by extend_menu() and the user has selected an area in the image
* window.
**********************************************************************/

void extend_moded_commands( //current mode
INT32 mode,
BOX selection_box //area selected
) {
char msg[MAX_CHARS + 1];

switch (mode) {
case RECOG_WERDS:
command_window->msg ("Recogging selected words");

/* This is how to apply a "word processor" function to each selected word */

process_selected_words(current_block_list,
selection_box,
&recog_interactive);
break;
case RECOG_PSEUDO:
command_window->msg ("Recogging selected blobs");

/* This is how to apply a "word processor" function to each selected word */

recog_pseudo_word(current_block_list, selection_box);
break;
default:
sprintf (msg, "Unexpected extended mode " INT32FORMAT, mode);
command_window->msg (msg);
}
}


/**********************************************************************
* extend_unmoded_commands()
*
* Function called by pgeditor when the user has selected one of the unmoded
* extended menu options.
**********************************************************************/

void extend_unmoded_commands( //current mode
INT32 cmd_event,
char *new_value //changed value if any
) {
char msg[MAX_CHARS + 1];

switch (cmd_event) {
case ACTION_2_CMD_EVENT: //a toggle event
if (new_value[0] == 'T')
//Display message
command_window->msg ("Extended Action 2 ON!!");
else
command_window->msg ("Extended Action 2 OFF!!");
break;
default:
sprintf (msg, "Unrecognised extended command " INT32FORMAT " (%s)",
cmd_event, new_value);
command_window->msg (msg);
break;
}
}


/*************************************************************************
* set_tess_tweak_vars()
Expand Down
14 changes: 0 additions & 14 deletions ccmain/tessedit.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,5 @@ int init_tesseract(const char *arg0,
void recognize_page(STRING& image_name);
void end_tesseract();

//handle for "MODES"
void extend_menu(RADIO_MENU *modes_menu,
INT16 modes_id_base, //mode cmd ids offset
NON_RADIO_MENU *other_menu, //handle for "OTHER"
INT16 other_id_base //mode cmd ids offset
);
//current mode
void extend_moded_commands(INT32 mode,
BOX selection_box //area selected
);
//current mode
void extend_unmoded_commands(INT32 cmd_event,
char *new_value //changed value if any
);
void set_tess_tweak_vars();
#endif
Loading

0 comments on commit dd18aea

Please sign in to comment.