Skip to content

Commit

Permalink
fix issue 813: implement input through stdin
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@936 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
zdenop@gmail.com committed Jan 7, 2014
1 parent ed28bae commit 9c25eda
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 10 deletions.
2 changes: 2 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
2013-09-20 v3.03
* Tesseract executable: support for output to stdout; limited support for one
page images from stdin (especially on Windows)
* Added Renderer to API to allow document-level processing and output
of document formats, like hOCR, PDF.
* Major refactor of word-level recognition, beam search, eliminating dead code.
Expand Down
61 changes: 51 additions & 10 deletions api/tesseractmain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@
#include "tprintf.h"
#include "openclwrapper.h"

#include <iostream>
#include <vector>

#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif // _WIN32

/**********************************************************************
* main()
*
Expand Down Expand Up @@ -119,7 +127,7 @@ int main(int argc, char **argv) {
}

if (output == NULL && noocr == false) {
fprintf(stderr, "Usage:\n %s imagename outputbase|stdout [options...] "
fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout [options...] "
"[configfile...]\n\n", argv[0]);

fprintf(stderr, "OCR options:\n");
Expand Down Expand Up @@ -215,14 +223,7 @@ int main(int argc, char **argv) {
// It would be simpler if we could set the value before Init,
// but that doesn't work.
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api.SetPageSegMode(pagesegmode);

FILE* fin = fopen(image, "rb");
if (fin == NULL) {
fprintf(stderr, "Cannot open input file: %s\n", image);
exit(2);
}
fclose(fin);
api.SetPageSegMode(pagesegmode);

tesseract::TessResultRenderer* renderer = NULL;
bool b;
Expand All @@ -234,7 +235,47 @@ int main(int argc, char **argv) {

if (renderer == NULL) renderer = new tesseract::TessTextRenderer();

if (!api.ProcessPages(image, NULL, 0, renderer)) {
bool stdInput = false;
if (!strcmp(image, "stdin") || !strcmp(image, "-"))
stdInput = true;

if (stdInput) {
char byt;
PIX *pixd = NULL;
std::vector<char> ch_data;
std::istream file(std::cin.rdbuf());

#ifdef WIN32
if (_setmode(_fileno(stdin), _O_BINARY) == -1)
tprintf("ERROR: cin to binary: %s", strerror(errno));
#endif // WIN32

while (file.get(byt)) {
ch_data.push_back(byt);
}
std::cin.ignore(std::cin.rdbuf()->in_avail() + 1);

size_t size = ch_data.size();
l_uint8 *data;
if ( (data = (l_uint8 *) malloc( size )) != NULL ) {
memcpy(data, &(ch_data)[0], size);
} else {
tprintf("Memory allocation error\n");
exit(1);
}

pixd = pixReadMem(data, size);
api.ProcessPage(pixd, 0, NULL, NULL, 0, renderer);
} else {
FILE* fin = fopen(image, "rb");
if (fin == NULL) {
fprintf(stderr, "Cannot open input file: %s\n", image);
exit(2);
}
fclose(fin);
}

if (!stdInput && !api.ProcessPages(image, NULL, 0, renderer)) {
fprintf(stderr, "Error during processing.\n");
} else {
for (tesseract::TessResultRenderer* r = renderer; r != NULL;
Expand Down

0 comments on commit 9c25eda

Please sign in to comment.