Skip to content

Commit

Permalink
Preparations for unicodization
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@56 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith committed May 16, 2007
1 parent c7e9ec8 commit a59e5dc
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 37 deletions.
54 changes: 25 additions & 29 deletions dict/dawg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#ifdef __MSW32__
#include <windows.h>
#else
#include <netinet/in.h>
#endif
#include "dawg.h"
#include "cutil.h"
#include "callcpp.h"
Expand Down Expand Up @@ -91,18 +96,24 @@ INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node) {
}


/*
* Initialize letter_is_okay to point to default implmentation (a main
* program can override this).
*/
LETTER_OK_FUNC letter_is_okay = &def_letter_is_okay;

/**********************************************************************
* letter_is_okay
* def_letter_is_okay
*
* Check this letter in light of the current state. If everything is
* still OK then return TRUE;
* Default way to check this letter in light of the current state. If
* everything is still OK then return TRUE.
**********************************************************************/
INT32 letter_is_okay(EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
char prevchar,
const char *word,
INT32 word_end) {
INT32 def_letter_is_okay(EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
char prevchar,
const char *word,
INT32 word_end) {
EDGE_REF edge;
STRING dummy_word(word); // Auto-deleting string fixes memory leak.

Expand Down Expand Up @@ -267,7 +278,8 @@ void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node) {
*
* Write the DAWG out to a file
**********************************************************************/
void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges) {
void read_squished_dawg(const char *filename, EDGE_ARRAY dawg,
INT32 max_num_edges) {
FILE *file;
EDGE_REF edge;
INT32 num_edges = 0;
Expand All @@ -282,28 +294,12 @@ void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges) {
#else
file = open_file (filename, "rb");
#endif
fseek(file, 0, SEEK_END);
long fsize = ftell(file);
rewind(file);
fread (&num_edges, sizeof (int), 1, file);
// Auto-detect relative endianness of file and OS as future DAWG
// files may be little-endian.
long diff1 = sizeof(EDGE_RECORD)*num_edges + sizeof(int) - fsize;
reverse32(&num_edges);
long diff2 = sizeof(EDGE_RECORD)*num_edges + sizeof(int) - fsize;
reverse32(&num_edges);
// One of diff1 and diff2 should now be 0, but find the smallest
// just in case.
if (diff1 < 0) diff1 = -diff1;
if (diff2 < 0) diff2 = -diff2;
bool swap = diff2 < diff1;
if (swap)
reverse32(&num_edges);
num_edges = ntohl(num_edges);
fread (&dawg[0], sizeof (EDGE_RECORD), num_edges, file);
fclose(file);
if (swap)
for (edge=0;edge<num_edges;edge++)
reverse32(&dawg[edge]);
for (edge=0;edge<num_edges;edge++)
dawg[edge] = ntohl(dawg[edge]);

for (edge=0; edge<max_num_edges; edge++)
if (last_edge (dawg, edge)) node_count++;
Expand Down
26 changes: 18 additions & 8 deletions dict/dawg.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,24 +225,34 @@ EDGE_REF edge_char_of(EDGE_ARRAY dawg,
int character,
int word_end);

INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);

INT32 letter_is_okay(EDGE_ARRAY dawg,

INT32 def_letter_is_okay(EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
char prevchar,
const char *word,
INT32 word_end);

INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
/*
* Allow for externally provided letter_is_okay.
*/
typedef INT32 (*LETTER_OK_FUNC)(EDGE_ARRAY, NODE_REF*, INT32, char, const char*,
INT32);
extern LETTER_OK_FUNC letter_is_okay;


INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);

void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);
void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);

void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges);
void read_squished_dawg(const char *filename, EDGE_ARRAY dawg,
INT32 max_num_edges);

INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index);
INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index);

INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);

/*
#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS
Expand All @@ -262,7 +272,7 @@ INT32 edges_in_node
_ARGS((EDGE_ARRAY dawg,
NODE_REF node));
INT32 letter_is_okay
INT32 def_letter_is_okay
_ARGS((EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
Expand Down

0 comments on commit a59e5dc

Please sign in to comment.