Skip to content

Commit

Permalink
Various fixes, including memory leak in fixspace, font labels on outp…
Browse files Browse the repository at this point in the history
…ut, removed some annoying debug output, fixes to initialization of parameters, general cleanup, and added Hindi

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@566 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith committed Mar 21, 2011
1 parent 96ca745 commit c81483f
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 23 deletions.
34 changes: 23 additions & 11 deletions api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ TessBaseAPI::TessBaseAPI()
language_(NULL),
last_oem_requested_(OEM_DEFAULT),
recognition_done_(false),
truth_cb_(NULL),
rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
image_width_(0), image_height_(0) {
}
Expand Down Expand Up @@ -132,11 +133,6 @@ bool TessBaseAPI::SetVariable(const char* name, const char* value) {
return ParamUtils::SetParam(name, value, false, tesseract_->params());
}

bool TessBaseAPI::SetVariableIfInit(const char* name, const char* value) {
if (tesseract_ == NULL) tesseract_ = new Tesseract;
return ParamUtils::SetParam(name, value, true, tesseract_->params());
}

bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
IntParam *p = ParamUtils::FindParam<IntParam>(
name, GlobalParams()->int_params, tesseract_->params()->int_params);
Expand Down Expand Up @@ -185,7 +181,9 @@ void TessBaseAPI::PrintVariables(FILE *fp) const {
// Returns 0 on success and -1 on initialization failure.
int TessBaseAPI::Init(const char* datapath, const char* language,
OcrEngineMode oem, char **configs, int configs_size,
bool configs_init_only) {
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params) {
// If the datapath, OcrEngineMode or the language have changed - start again.
// Note that the language_ field stores the last requested language that was
// initialized successfully, while tesseract_->lang stores the language
Expand All @@ -206,7 +204,8 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
tesseract_ = new Tesseract;
if (tesseract_->init_tesseract(
datapath, output_file_ != NULL ? output_file_->string() : NULL,
language, oem, configs, configs_size, configs_init_only) != 0) {
language, oem, configs, configs_size, vars_vec, vars_values,
set_only_init_params) != 0) {
return -1;
}
}
Expand Down Expand Up @@ -305,6 +304,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
if (tesseract_ == NULL)
return;
tesseract_->ResetAdaptiveClassifier();
tesseract_->getDict().ResetDocumentDictionary();
}

// Provide an image for Tesseract to recognize. Format is as
Expand Down Expand Up @@ -542,6 +542,8 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
tesseract_->CorrectClassifyWords(page_res_);
return 0;
}
if (truth_cb_ != NULL) truth_cb_->Run(image_height_, page_res_);

if (tesseract_->interactive_mode) {
tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
// The page_res is invalid after an interactive session, so cleanup
Expand Down Expand Up @@ -1391,6 +1393,8 @@ int TessBaseAPI::FindLines() {
return -1;
}

tesseract_->PrepareForPageseg();

Tesseract* osd_tess = osd_tesseract_;
OSResults osr;
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
Expand All @@ -1400,7 +1404,7 @@ int TessBaseAPI::FindLines() {
osd_tesseract_ = new Tesseract;
if (osd_tesseract_->init_tesseract(
datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
NULL, 0, false) == 0) {
NULL, 0, NULL, NULL, false) == 0) {
osd_tess = osd_tesseract_;
} else {
tprintf("Warning: Auto orientation and script detection requested,"
Expand All @@ -1413,6 +1417,16 @@ int TessBaseAPI::FindLines() {

if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
return -1;
// If OCR is to be run using Tesseract, OCR-able blobs are required for
// training, or interactive mode is needed, prepare data and images for ocr.
if (tesseract_->interactive_mode ||
tesseract_->tessedit_train_from_boxes ||
tesseract_->tessedit_ambigs_training ||
tesseract_->tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
tesseract_->tessedit_ocr_engine_mode ==
OEM_TESSERACT_CUBE_COMBINED) {
tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
}
return 0;
}

Expand Down Expand Up @@ -1686,7 +1700,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
}

if (blob->outlines)
tesseract_->AdaptToChar(blob, id, threshold);
tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold);
delete blob;
}

Expand Down Expand Up @@ -1867,7 +1881,6 @@ ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
int num_max_matches,
int* unichar_ids,
char* configs,
float* ratings,
int* num_matches_returned) {
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
Expand All @@ -1881,7 +1894,6 @@ void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
choices_it.forward()) {
BLOB_CHOICE* choice = choices_it.data();
unichar_ids[index] = choice->unichar_id();
configs[index] = choice->config();
ratings[index] = choice->rating();
++index;
}
Expand Down
27 changes: 17 additions & 10 deletions api/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include "apitypes.h"
#include "genericvector.h"
#include "thresholder.h"
#include "unichar.h"
#include "tesscallback.h"

class PAGE_RES;
class PAGE_RES_IT;
Expand Down Expand Up @@ -71,11 +73,12 @@ class Trie;

typedef int (Dict::*DictFunc)(void* void_dawg_args,
UNICHAR_ID unichar_id, bool word_end);
typedef double (Dict::*ProbabilityInContextFunc)(const char* context,
typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
const char* context,
int context_bytes,
const char* character,
int character_bytes);

typedef TessCallback2<int, PAGE_RES *> TruthCallback;

/**
* Base class for all tesseract APIs.
Expand Down Expand Up @@ -115,11 +118,10 @@ class TESSDLL_API TessBaseAPI {
* defaults on End().
* TODO(rays) Add a command-line option to dump the parameters to stdout
* and add a pointer to it in the FAQ
*
* Note: Must be called after Init().
*/
bool SetVariable(const char* name, const char* value);
// Same as above, but the parameter is set only if it is one of the "init"
// parameters (defined with *_INIT_* macro).
bool SetVariableIfInit(const char *name, const char *value);

// Returns true if the parameter was found among Tesseract parameters.
// Fills in value with the value of the parameter.
Expand Down Expand Up @@ -161,12 +163,15 @@ class TESSDLL_API TessBaseAPI {
* to be set before Init.
*/
int Init(const char* datapath, const char* language, OcrEngineMode mode,
char **configs, int configs_size, bool configs_init_only);
char **configs, int configs_size,
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params);
int Init(const char* datapath, const char* language, OcrEngineMode oem) {
return Init(datapath, language, oem, NULL, 0, false);
return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
}
int Init(const char* datapath, const char* language) {
return Init(datapath, language, OEM_DEFAULT, NULL, 0, false);
return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
}

/**
Expand Down Expand Up @@ -507,7 +512,6 @@ class TESSDLL_API TessBaseAPI {
void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
int num_max_matches,
int* unichar_ids,
char* configs,
float* ratings,
int* num_matches_returned);

Expand Down Expand Up @@ -540,6 +544,8 @@ class TESSDLL_API TessBaseAPI {
return tesseract_;
}

void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }

// Return a pointer to underlying CubeRecoContext object if present.
CubeRecoContext *GetCubeRecoContext() const;

Expand Down Expand Up @@ -635,7 +641,8 @@ class TESSDLL_API TessBaseAPI {
STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
bool recognition_done_; ///< page_res_ contains recognition data.
TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES

/**
* @defgroup ThresholderParams
Expand Down
46 changes: 46 additions & 0 deletions api/pageiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,52 @@ bool PageIterator::Baseline(PageIteratorLevel level,
return true;
}

void PageIterator::Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) {
BLOCK* block = it_->block()->block;

// Orientation
FCOORD up_in_image(0.0, 1.0);
up_in_image.unrotate(block->classify_rotation());
up_in_image.rotate(block->re_rotation());

if (up_in_image.x() == 0.0F) {
if (up_in_image.y() > 0.0F) {
*orientation = ORIENTATION_PAGE_UP;
} else {
*orientation = ORIENTATION_PAGE_DOWN;
}
} else if (up_in_image.x() > 0.0F) {
*orientation = ORIENTATION_PAGE_RIGHT;
} else {
*orientation = ORIENTATION_PAGE_LEFT;
}

// Writing direction
bool is_vertical_text = (block->classify_rotation().x() == 0.0);
bool right_to_left = block->right_to_left();
*writing_direction =
is_vertical_text
? WRITING_DIRECTION_TOP_TO_BOTTOM
: (right_to_left
? WRITING_DIRECTION_RIGHT_TO_LEFT
: WRITING_DIRECTION_LEFT_TO_RIGHT);

// Textline Order
bool is_mongolian = false; // TODO(eger): fix me
*textline_order = is_vertical_text
? (is_mongolian
? TEXTLINE_ORDER_LEFT_TO_RIGHT
: TEXTLINE_ORDER_RIGHT_TO_LEFT)
: TEXTLINE_ORDER_TOP_TO_BOTTOM;

// Deskew angle
FCOORD skew = block->skew(); // true horizontal for textlines
*deskew_angle = -skew.angle();
}

// Sets up the internal data for iterating the blobs of a new word, then
// moves the iterator to the given offset.
void PageIterator::BeginWord(int offset) {
Expand Down
11 changes: 11 additions & 0 deletions api/pageiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ class PageIterator {
bool Baseline(PageIteratorLevel level,
int* x1, int* y1, int* x2, int* y2) const;

// Returns orientation for the block the iterator points to.
// orientation, writing_direction, textline_order: see publictypes.h
// deskew_angle: after rotating the block so the text orientation is
// upright, how many radians does one have to rotate the
// block anti-clockwise for it to be level?
// -Pi/4 <= deskew_angle <= Pi/4
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle);

protected:
// Sets up the internal data for iterating the blobs of a new word, then
// moves the iterator to the given offset.
Expand Down
2 changes: 1 addition & 1 deletion api/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ const char* ResultIterator::WordFontAttributes(bool* is_bold,
int* pointsize,
int* font_id) const {
if (it_->word() == NULL) return NULL; // Already at the end!
*font_id = it_->word()->font1;
*font_id = it_->word()->fontinfo_id;
if (*font_id < 0) return NULL; // No font available.
const UnicityTable<FontInfo> &font_table = tesseract_->get_fontinfo_table();
FontInfo font_info = font_table.get(*font_id);
Expand Down
2 changes: 1 addition & 1 deletion api/tesseractmain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ int main(int argc, char **argv) {
api.SetOutputName(output);
api.SetPageSegMode(pagesegmode);
api.Init(argv[0], lang, tesseract::OEM_DEFAULT,
&(argv[arg]), argc - arg, false);
&(argv[arg]), argc - arg, NULL, NULL, false);

tprintf(_("Tesseract Open Source OCR Engine v%s with Leptonica\n"),
tesseract::TessBaseAPI::Version());
Expand Down

0 comments on commit c81483f

Please sign in to comment.