Skip to content

Commit

Permalink
Devstudio changes for v2.00.
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@81 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith committed Jul 18, 2007
1 parent 47cd8b5 commit f4baca2
Show file tree
Hide file tree
Showing 19 changed files with 14,121 additions and 51 deletions.
7 changes: 7 additions & 0 deletions ccutil/ocrclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,13 @@ typedef struct /*bitmap strip */

typedef struct /*single character */
{
// It should be noted that the format for char_code for version 2.0 and beyond is UTF8
// which means that ASCII characters will come out as one structure but other characters
// will be returned in two or more instances of this structure with a single byte of the
// UTF8 code in each, but each will have the same bounding box.
// Programs which want to handle languagues with different characters sets will need to
// handle extended characters appropriately, but *all* code needs to be prepared to
// receive UTF8 coded characters for characters such as bullet and fancy quotes.
UINT16 char_code; /*character itself */
INT16 left; /*of char (-1) */
INT16 right; /*of char (-1) */
Expand Down
6 changes: 3 additions & 3 deletions classify/mfoutline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,9 @@ LIST ConvertOutlines(TESSLINE *Outline,
if (OutlineType == outer)
ConvertedOutlines = ConvertOutlines (Outline->child,
ConvertedOutlines, hole);
else
ConvertedOutlines = ConvertOutlines (Outline->child,
ConvertedOutlines, outer);
else
ConvertedOutlines = ConvertOutlines (Outline->child,
ConvertedOutlines, outer);

MFOutline = ConvertOutline (Outline);
ConvertedOutlines = push (ConvertedOutlines, MFOutline);
Expand Down
94 changes: 55 additions & 39 deletions dlltest/dlltest.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/**********************************************************************
* File: dlltest.cpp
* Description: Main program to test the tessdll interface.
* Author: Ray Smith
* Created: Wed May 16 15:17:46 PDT 2007
* Author: Ray Smith
* Created: Wed May 16 15:17:46 PDT 2007
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -18,8 +18,8 @@
**********************************************************************/

#include "stdafx.h"
#include "imgs.h"
#include "tessdll.h"
#include "imgs.h"
#include "tessdll.h"

/**********************************************************************
* main()
Expand All @@ -28,46 +28,62 @@

int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "Usage:%s imagename outputname\n", argv[0]);
fprintf(stderr, "Usage:%s imagename outputname\n", argv[0]);
exit(1);
}


IMAGE image;
if (image.read_header(argv[1]) < 0) {
fprintf(stderr, "Can't open %s\n", argv[1]);
exit(1);
fprintf(stderr, "Can't open %s\n", argv[1]);
exit(1);
}
if (image.read(image.get_ysize ()) < 0) {
fprintf(stderr, "Can't read %s\n", argv[1]);
exit(1);
}
if (image.get_bpp() != 1) {
fprintf(stderr, "Image is not binary!\n");
exit(1);
}

TessDllAPI api("eng");
api.BeginPageUpright(image.get_xsize(), image.get_ysize(), image.get_buffer());
ETEXT_DESC* output = api.Recognize_all_Words();

FILE* fp = fopen(argv[2],"w");
if (fp == NULL) {
fprintf(stderr, "Can't create %s\n", argv[2]);
exit(1);
}

for (int i = 0; i < output->count; ++i) {
const EANYCODE_CHAR* ch = &output->text[i];
for (int b = 0; b < ch->blanks; ++b)
fprintf(fp, "\n");
fprintf(fp, "%C[%x](%d,%d)->(%d,%d)\n",
ch->char_code, ch->char_code,
ch->left, ch->bottom, ch->right, ch->top);
if (ch->formatting & 64)
fprintf(fp, "<nl>\n\n");
if (ch->formatting & 128)
fprintf(fp, "<para>\n\n");
}

return 0;
fprintf(stderr, "Can't read %s\n", argv[1]);
exit(1);
}



TessDllAPI api("eng");



api.BeginPageUpright(image.get_xsize(), image.get_ysize(), image.get_buffer(),
image.get_bpp());

ETEXT_DESC* output = api.Recognize_all_Words();




FILE* fp = fopen(argv[2],"w");
if (fp == NULL) {
fprintf(stderr, "Can't create %s\n", argv[2]);
exit(1);
}

for (int i = 0; i < output->count; ++i) {
// It should be noted that the format for char_code for version 2.0 and beyond is UTF8
// which means that ASCII characters will come out as one structure but other characters
// will be returned in two or more instances of this structure with a single byte of the
// UTF8 code in each, but each will have the same bounding box.
// Programs which want to handle languagues with different characters sets will need to
// handle extended characters appropriately, but *all* code needs to be prepared to
// receive UTF8 coded characters for characters such as bullet and fancy quotes.
const EANYCODE_CHAR* ch = &output->text[i];
for (int b = 0; b < ch->blanks; ++b)
fprintf(fp, "\n");
fprintf(fp, "%C[%x](%d,%d)->(%d,%d)\n",
ch->char_code, ch->char_code,
ch->left, ch->bottom, ch->right, ch->top);
if (ch->formatting & 64)
fprintf(fp, "<nl>\n\n");
if (ch->formatting & 128)
fprintf(fp, "<para>\n\n");
}

fclose(fp);

return 0;
}
Loading

0 comments on commit f4baca2

Please sign in to comment.