Skip to content

Commit

Permalink
Harmless improvements from 3.00 going in to 2.04
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@209 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith committed Dec 30, 2008
1 parent de46248 commit 93c8e5d
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 103 deletions.
17 changes: 15 additions & 2 deletions ccutil/boxread.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/**********************************************************************
* File: boxread.cpp
* Description: Read data from a box file.
* Author: Ray Smith
* Created: Fri Aug 24 17:47:23 PDT 2007
* Author: Ray Smith
* Created: Fri Aug 24 17:47:23 PDT 2007
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -23,12 +23,25 @@
#include "unichar.h"
#include "tprintf.h"

// Box files are used ONLY DURING TRAINING, but by both processes of
// creating tr files with tesseract, and unicharset_extractor.
// read_next_box factors out the code to interpret a line of a box
// file so that applybox and unicharset_extractor interpret the same way.
// This function returns the next valid box file utf8 string and coords
// and returns true, or false on eof (and closes the file).
// It ignores the uft8 file signature, checks for valid utf-8 and allows
// space or tab between fields.
// utf8_str must be at least kBoxReadBufSize in length.
// If there are page numbers in the file, it reads them all.
bool read_next_box(FILE* box_file, char* utf8_str,
int* x_min, int* y_min, int* x_max, int* y_max) {
return read_next_box(-1, box_file, utf8_str,
x_min, y_min, x_max, y_max);
}

// As read_next_box above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
bool read_next_box(int target_page, FILE* box_file, char* utf8_str,
int* x_min, int* y_min, int* x_max, int* y_max) {
static int line = 0;
Expand Down
7 changes: 3 additions & 4 deletions ccutil/boxread.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
*
**********************************************************************/

#ifndef THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
#define THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
#ifndef TESSERACT_CCUTIL_BOXREAD_H__
#define TESSERACT_CCUTIL_BOXREAD_H__

#include <stdio.h>

Expand All @@ -41,5 +41,4 @@ bool read_next_box(FILE* box_file, char* utf8_str,
bool read_next_box(int page, FILE* box_file, char* utf8_str,
int* x_min, int* y_min, int* x_max, int* y_max);

#endif // THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__

#endif // TESSERACT_CCUTIL_BOXREAD_H__
15 changes: 8 additions & 7 deletions ccutil/errcode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

const ERRCODE BADERRACTION = "Illegal error action";
#define MAX_MSG 1024
extern inT16 global_abort_code;

/**********************************************************************
* error
Expand Down Expand Up @@ -76,27 +75,29 @@ const char *format, ... //special message
//no specific
msgptr += sprintf (msgptr, "\n");

tprintf(msg);
if ((strstr (message, "File") != NULL) ||
fprintf(stderr, msg);
/*if ((strstr (message, "File") != NULL) ||
(strstr (message, "file") != NULL))
global_abort_code = FILE_ABORT;
else if ((strstr (message, "List") != NULL) ||
(strstr (message, "list") != NULL))
global_abort_code = LIST_ABORT;
else if ((strstr (message, "Memory") != NULL) ||
(strstr (message, "memory") != NULL))
global_abort_code = MEMORY_ABORT;
else
global_abort_code = NO_ABORT_CODE;
*/

int* p = NULL;
switch (action) {
case DBG:
case TESSLOG:
return; //report only
case EXIT:
err_exit();
//err_exit();
case ABORT:
abort();
// Create a deliberate segv as the stack trace is more useful that way.
if (!*p)
abort();
default:
BADERRACTION.error ("error", ABORT, NULL);
}
Expand Down
16 changes: 9 additions & 7 deletions ccutil/globaloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@
#include "errcode.h"
#include "tprintf.h"

inT16 global_loc_code = LOC_INIT;//location code
/*inT16 global_loc_code = LOC_INIT;//location code
inT16 global_subloc_code = SUBLOC_NORM;
//pass2 subloc code
inT16 global_subsubloc_code = SUBSUBLOC_OTHER;
//location code
inT16 global_abort_code = NO_ABORT_CODE;
//Prog abort code

*/
void signal_exit( //
int signal_code //Signal which
) {
int exit_status;
/*int exit_status;
if ((global_loc_code == LOC_PASS2) || (global_loc_code == LOC_FUZZY_SPACE))
global_loc_code += global_subloc_code + global_subsubloc_code;
Expand All @@ -49,7 +49,8 @@ void signal_exit( //
exit_status, global_loc_code, signal_code);
}
exit(exit_status);
exit(exit_status);*/
exit(signal_code);
}


Expand Down Expand Up @@ -95,19 +96,20 @@ void signal_termination_handler( //The real signal

//}; //end extern "C"


void set_global_loc_code(int loc_code) {
global_loc_code = loc_code;
// global_loc_code = loc_code;

}


void set_global_subloc_code(int loc_code) {
global_subloc_code = loc_code;
// global_subloc_code = loc_code;

}


void set_global_subsubloc_code(int loc_code) {
global_subsubloc_code = loc_code;
// global_subsubloc_code = loc_code;

}
6 changes: 6 additions & 0 deletions ccutil/host.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,10 @@ typedef unsigned char BOOL8;
#define NULL 0L
#endif

// Return true if x is within tolerance of y
template<class T> bool NearlyEqual(T x, T y, T tolerance) {
T diff = x - y;
return diff <= tolerance && -diff <= tolerance;
}

#endif
59 changes: 28 additions & 31 deletions ccutil/mainblk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ EXTERN DLLSYM STRING imagebasename;
EXTERN BOOL_VAR (m_print_variables, FALSE,
"Print initial values of all variables");
EXTERN STRING_VAR (m_data_sub_dir, "tessdata/", "Directory for data files");
EXTERN INT_VAR (memgrab_size, 0, "Preallocation size for batch use");
/*
EXTERN INT_VAR (memgrab_size, 0, "Preallocation size for batch use");*/


const ERRCODE NO_PATH =
"Warning:explicit path for executable will not be used for configs";
static const ERRCODE USAGE = "Usage";
Expand All @@ -51,7 +54,7 @@ static const ERRCODE USAGE = "Usage";
* Main for mithras demo program. Read the arguments and set up globals.
**********************************************************************/

void main_setup( /*main demo program */
void main_setup( /*main demo program */
const char *argv0, //program name
const char *basename, //name of image
int argc, /*argument count */
Expand All @@ -65,21 +68,29 @@ void main_setup( /*main demo program */

imagebasename = basename; /*name of image */

if(!getenv("TESSDATA_PREFIX")) {
#ifdef TESSDATA_PREFIX
#define _STR(a) #a
#define _XSTR(a) _STR(a)
datadir = _XSTR(TESSDATA_PREFIX);
#undef _XSTR
#undef _STR
#else
if (getpath (argv0, datadir) < 0)
#ifdef __UNIX__
CANTOPENFILE.error ("main", ABORT, "%s to get path", argv[0]);
#else
NO_PATH.error ("main", DBG, NULL);
#endif
#endif
// TESSDATA_PREFIX Environment variable overrules everything.
// Compiled in -DTESSDATA_PREFIX is next.
// NULL goes to current directory.
// An actual value of argv0 is used if getpath is successful.
if (!getenv("TESSDATA_PREFIX")) {
#ifdef TESSDATA_PREFIX
#define _STR(a) #a
#define _XSTR(a) _STR(a)
datadir = _XSTR(TESSDATA_PREFIX);
#undef _XSTR
#undef _STR
#else
if (argv0 != NULL) {
if (getpath(argv0, datadir) < 0)
#ifdef __UNIX__
CANTOPENFILE.error("main", ABORT, "%s to get path", argv0);
#else
NO_PATH.error("main", DBG, NULL);
#endif
} else {
datadir = "./";
}
#endif
} else {
datadir = getenv("TESSDATA_PREFIX");
}
Expand Down Expand Up @@ -114,18 +125,4 @@ void main_setup( /*main demo program */


datadir += m_data_sub_dir; /*data directory */

#ifdef __UNIX__
if (memgrab_size > 0) {
void *membuf; //test virtual mem
//test memory
membuf = malloc (memgrab_size);
if (membuf == NULL) {
raise(SIGTTOU); //hangup for jobber
sleep (10);
}
else
free(membuf);
}
#endif
}
3 changes: 2 additions & 1 deletion ccutil/memry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ DLLSYM char *alloc_string( //allocate string
}
return &string[1]; //string for user
#else
return static_cast<char*>(malloc(count));
// Round up the amount allocated to a multiple of 4
return static_cast<char*>(malloc((count + 3) & ~3));
#endif
}

Expand Down
4 changes: 2 additions & 2 deletions ccutil/ndminx.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
#define NDMINX_H

#ifndef MAX
#define MAX(a,b) ( (a>b) ? a : b )
#define MAX(x,y) (((x) >= (y))?(x):(y))
#endif

#ifndef MIN
#define MIN(a,b) ( (a<b) ? a : b )
#define MIN(x,y) (((x) <= (y))?(x):(y))
#endif

#endif
11 changes: 8 additions & 3 deletions ccutil/unichar.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
//
///////////////////////////////////////////////////////////////////////

#ifndef THIRD_PARTY_TESSERACT_CCUTIL_UNICHAR_H__
#define THIRD_PARTY_TESSERACT_CCUTIL_UNICHAR_H__
#ifndef TESSERACT_CCUTIL_UNICHAR_H__
#define TESSERACT_CCUTIL_UNICHAR_H__

#include <memory.h>

Expand All @@ -29,6 +29,11 @@
// A UNICHAR_ID is the unique id of a unichar.
typedef int UNICHAR_ID;

// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";

// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multple Unicode characters representing the NFKC expansion of a ligature
Expand Down Expand Up @@ -76,4 +81,4 @@ class UNICHAR {
char chars[UNICHAR_LEN];
};

#endif // THIRD_PARTY_TESSERACT_CCUTIL_UNICHAR_H__
#endif // TESSERACT_CCUTIL_UNICHAR_H__
6 changes: 3 additions & 3 deletions ccutil/unicharmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
//
///////////////////////////////////////////////////////////////////////

#ifndef THIRD_PARTY_TESSERACT_CCUTIL_UNICHARMAP_H__
#define THIRD_PARTY_TESSERACT_CCUTIL_UNICHARMAP_H__
#ifndef TESSERACT_CCUTIL_UNICHARMAP_H__
#define TESSERACT_CCUTIL_UNICHARMAP_H__

#include "unichar.h"

Expand Down Expand Up @@ -79,4 +79,4 @@ class UNICHARMAP {
UNICHARMAP_NODE* nodes;
};

#endif // THIRD_PARTY_TESSERACT_CCUTIL_UNICHARMAP_H__
#endif // TESSERACT_CCUTIL_UNICHARMAP_H__
2 changes: 1 addition & 1 deletion ccutil/varable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ DLLSYM BOOL8 read_variables_file(const char *file // name to read

if (!foundit) {
anyerr = TRUE; // had an error
tprintf("read_variables_file:variable not found: %s",
tprintf("read_variables_file:variable not found: %s\n",
line);
}
}
Expand Down
Loading

0 comments on commit 93c8e5d

Please sign in to comment.