Skip to content

Commit

Permalink
Fixed bugs in scanutils that were causing accuracy degradation
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1084 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith@gmail.com committed Apr 28, 2014
1 parent cda8e74 commit d748d94
Showing 1 changed file with 40 additions and 33 deletions.
73 changes: 40 additions & 33 deletions ccutil/scanutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
// limitations under the License.

#include <ctype.h>
#include <math.h>
#include <stdarg.h>
#include <stddef.h>
#include <string.h>
Expand Down Expand Up @@ -89,16 +90,16 @@ TestBit(unsigned long *bitmap, unsigned int bit) {
return static_cast<int>(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1;
}

static inline int DigitValue(int ch) {
static inline int DigitValue(int ch, int base) {
if (ch >= '0' && ch <= '9') {
return ch-'0';
} else if (ch >= 'A' && ch <= 'Z') {
if (base >= 10 || ch <= '7')
return ch-'0';
} else if (ch >= 'A' && ch <= 'Z' && base == 16) {
return ch-'A'+10;
} else if (ch >= 'a' && ch <= 'z') {
} else if (ch >= 'a' && ch <= 'z' && base == 16) {
return ch-'a'+10;
} else {
return -1;
}
return -1;
}

// IO (re-)implementations -----------------------------------------------------
Expand All @@ -109,7 +110,7 @@ uintmax_t streamtoumax(FILE* s, int base) {

for (c = fgetc(s);
isspace(static_cast<unsigned char>(c)) && (c != EOF);
c = fgetc(s))
c = fgetc(s)) {}

// Single optional + or -
if (c == '-' || c == '+') {
Expand All @@ -136,7 +137,7 @@ uintmax_t streamtoumax(FILE* s, int base) {
}

// Actual number parsing
for (; (c != EOF) && (d = DigitValue(c)) >= 0 && d < base; c = fgetc(s))
for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s))
v = v*base + d;

ungetc(c, s);
Expand All @@ -161,19 +162,31 @@ double streamtofloat(FILE* s) {
}

// Actual number parsing
for (; (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s))
for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s))
v = v*10 + d;
if (c == '.') {
for (c = fgetc(s); (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) {
for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
w = w*10 + d;
k *= 10;
}
} else if (c == 'e' || c == 'E')
tprintf("WARNING: Scientific Notation not supported!");

ungetc(c, s);
}
double f = static_cast<double>(v)
+ static_cast<double>(w) / static_cast<double>(k);
if (c == 'e' || c == 'E') {
c = fgetc(s);
int expsign = 1;
if (c == '-' || c == '+') {
expsign = (c == '-') ? -1 : 1;
c = fgetc(s);
}
int exponent = 0;
for (; (c != EOF) && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
exponent = exponent * 10 + d;
}
exponent *= expsign;
f *= pow(10.0, static_cast<double>(exponent));
}
ungetc(c, s);

return minus ? -f : f;
}
Expand All @@ -194,14 +207,15 @@ double strtofloat(const char* s) {
}

// Actual number parsing
for (; *s && (d = DigitValue(*s)) >= 0; s++)
for (; *s && (d = DigitValue(*s, 10)) >= 0; s++)
v = v*10 + d;
if (*s == '.') {
for (++s; *s && (d = DigitValue(*s)) >= 0; s++) {
for (++s; *s && (d = DigitValue(*s, 10)) >= 0; s++) {
w = w*10 + d;
k *= 10;
}
} else if (*s == 'e' || *s == 'E')
}
if (*s == 'e' || *s == 'E')
tprintf("WARNING: Scientific Notation not supported!");

double f = static_cast<double>(v)
Expand Down Expand Up @@ -294,22 +308,15 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) {
break;

case ST_FLAGS:
switch (ch) {
case '*':
flags |= FL_SPLAT;
break;

case 0: case 1: case 2: case 3: case 4:
case 5: case 6: case 7: case 8: case 9:
width = (ch-'0');
state = ST_WIDTH;
flags |= FL_WIDTH;
break;

default:
state = ST_MODIFIERS;
p--; // Process this character again
break;
if (ch == '*') {
flags |= FL_SPLAT;
} else if ('0' <= ch && ch <= '9') {
width = (ch-'0');
state = ST_WIDTH;
flags |= FL_WIDTH;
} else {
state = ST_MODIFIERS;
p--; // Process this character again
}
break;

Expand Down

0 comments on commit d748d94

Please sign in to comment.