From d748d94aae9c4f0c46c62b39b5038f07699c2318 Mon Sep 17 00:00:00 2001 From: "theraysmith@gmail.com" Date: Mon, 28 Apr 2014 23:06:41 +0000 Subject: [PATCH] Fixed bugs in scanutils that were causing accuracy degradation git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1084 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- ccutil/scanutils.cpp | 73 ++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/ccutil/scanutils.cpp b/ccutil/scanutils.cpp index 1ac98963fa..b1395a99b5 100644 --- a/ccutil/scanutils.cpp +++ b/ccutil/scanutils.cpp @@ -20,6 +20,7 @@ // limitations under the License. #include +#include #include #include #include @@ -89,16 +90,16 @@ TestBit(unsigned long *bitmap, unsigned int bit) { return static_cast(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1; } -static inline int DigitValue(int ch) { +static inline int DigitValue(int ch, int base) { if (ch >= '0' && ch <= '9') { - return ch-'0'; - } else if (ch >= 'A' && ch <= 'Z') { + if (base >= 10 || ch <= '7') + return ch-'0'; + } else if (ch >= 'A' && ch <= 'Z' && base == 16) { return ch-'A'+10; - } else if (ch >= 'a' && ch <= 'z') { + } else if (ch >= 'a' && ch <= 'z' && base == 16) { return ch-'a'+10; - } else { - return -1; } + return -1; } // IO (re-)implementations ----------------------------------------------------- @@ -109,7 +110,7 @@ uintmax_t streamtoumax(FILE* s, int base) { for (c = fgetc(s); isspace(static_cast(c)) && (c != EOF); - c = fgetc(s)) + c = fgetc(s)) {} // Single optional + or - if (c == '-' || c == '+') { @@ -136,7 +137,7 @@ uintmax_t streamtoumax(FILE* s, int base) { } // Actual number parsing - for (; (c != EOF) && (d = DigitValue(c)) >= 0 && d < base; c = fgetc(s)) + for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s)) v = v*base + d; ungetc(c, s); @@ -161,19 +162,31 @@ double streamtofloat(FILE* s) { } // Actual number parsing - for (; (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) + for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) v = v*10 + d; if (c == '.') { - for (c = fgetc(s); (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) { + for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) { w = w*10 + d; k *= 10; } - } else if (c == 'e' || c == 'E') - tprintf("WARNING: Scientific Notation not supported!"); - - ungetc(c, s); + } double f = static_cast(v) + static_cast(w) / static_cast(k); + if (c == 'e' || c == 'E') { + c = fgetc(s); + int expsign = 1; + if (c == '-' || c == '+') { + expsign = (c == '-') ? -1 : 1; + c = fgetc(s); + } + int exponent = 0; + for (; (c != EOF) && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) { + exponent = exponent * 10 + d; + } + exponent *= expsign; + f *= pow(10.0, static_cast(exponent)); + } + ungetc(c, s); return minus ? -f : f; } @@ -194,14 +207,15 @@ double strtofloat(const char* s) { } // Actual number parsing - for (; *s && (d = DigitValue(*s)) >= 0; s++) + for (; *s && (d = DigitValue(*s, 10)) >= 0; s++) v = v*10 + d; if (*s == '.') { - for (++s; *s && (d = DigitValue(*s)) >= 0; s++) { + for (++s; *s && (d = DigitValue(*s, 10)) >= 0; s++) { w = w*10 + d; k *= 10; } - } else if (*s == 'e' || *s == 'E') + } + if (*s == 'e' || *s == 'E') tprintf("WARNING: Scientific Notation not supported!"); double f = static_cast(v) @@ -294,22 +308,15 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) { break; case ST_FLAGS: - switch (ch) { - case '*': - flags |= FL_SPLAT; - break; - - case 0: case 1: case 2: case 3: case 4: - case 5: case 6: case 7: case 8: case 9: - width = (ch-'0'); - state = ST_WIDTH; - flags |= FL_WIDTH; - break; - - default: - state = ST_MODIFIERS; - p--; // Process this character again - break; + if (ch == '*') { + flags |= FL_SPLAT; + } else if ('0' <= ch && ch <= '9') { + width = (ch-'0'); + state = ST_WIDTH; + flags |= FL_WIDTH; + } else { + state = ST_MODIFIERS; + p--; // Process this character again } break;