Skip to content

Commit

Permalink
Allow arbitrary configuration options to be set from the command line…
Browse files Browse the repository at this point in the history
… (fix issue 893)

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@837 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
zdenop@gmail.com committed Apr 29, 2013
1 parent 1032cb1 commit 7dcfd02
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
26 changes: 23 additions & 3 deletions api/tesseractmain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ int main(int argc, char **argv) {
} else if (strcmp(argv[arg], "--print-parameters") == 0) {
noocr = true;
print_parameters = true;
} else if (strcmp(argv[arg], "-o") == 0 && arg + 1 < argc) {
// handled properly after api init
++arg;
} else if (image == NULL) {
image = argv[arg];
} else if (output == NULL) {
Expand All @@ -105,7 +108,8 @@ int main(int argc, char **argv) {

if (output == NULL && noocr == false) {
fprintf(stderr, _("Usage:%s imagename outputbase|stdout [-l lang] "
"[-psm pagesegmode] [configfile...]\n\n"), argv[0]);
"[-psm pagesegmode] [-o configvar=value] "
"[configfile...]\n\n"), argv[0]);
fprintf(stderr,
_("pagesegmode values are:\n"
"0 = Orientation and script detection (OSD) only.\n"
Expand All @@ -119,8 +123,9 @@ int main(int argc, char **argv) {
"8 = Treat the image as a single word.\n"
"9 = Treat the image as a single word in a circle.\n"
"10 = Treat the image as a single character.\n"));
fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any"
"configfile.\n\n"));
fprintf(stderr, _("multiple -o arguments are allowed.\n"));
fprintf(stderr, _("-l lang, -psm pagesegmode and any -o options must occur"
"before any configfile.\n\n"));
fprintf(stderr, _("Single options:\n"));
fprintf(stderr, _(" -v --version: version info\n"));
fprintf(stderr, _(" --list-langs: list available languages for tesseract "
Expand All @@ -143,6 +148,21 @@ int main(int argc, char **argv) {
exit(1);
}

char opt1[255], opt2[255];
for (arg = 0; arg < argc; arg++) {
if (strcmp(argv[arg], "-o") == 0 && arg + 1 < argc) {
strncpy(opt1, argv[arg + 1], 255);
*(strchr(opt1, '=')) = 0;
strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255);
opt2[254] = 0;
++arg;

if(!api.SetVariable(opt1, opt2)) {
fprintf(stderr, _("Could not set option: %s=%s\n"), opt1, opt2);
}
}
}

if (list_langs) {
GenericVector<STRING> languages;
api.GetAvailableLanguagesAsVector(&languages);
Expand Down
11 changes: 8 additions & 3 deletions doc/tesseract.1
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
tesseract \- command\-line OCR engine
.SH "SYNOPSIS"
.sp
\fBtesseract\fR \fIimagename\fR \fIoutbase\fR [\fI\-l lang\fR] [\fI\-psm N\fR] [\fIconfigfile\fR \&...]
\fBtesseract\fR \fIimagename\fR \fIoutbase\fR|\fIstdout\fR [\fI\-l lang\fR] [\fI\-psm N\fR] [\gI\-o configvar=value\fR] [\fIconfigfile\fR \&...]
.SH "DESCRIPTION"
.sp
tesseract(1) is a commercial quality OCR engine originally developed at HP between 1985 and 1995\&. In 1995, this engine was among the top 3 evaluated by UNLV\&. It was open\-sourced by HP and UNLV in 2005, and has been developed at Google since then\&.
Expand All @@ -45,7 +45,7 @@ The name of the input image\&. Most image file formats (anything readable by Lep
\fIoutbase\fR
.RS 4
The basename of the output file (to which the appropriate extension will be appended)\&. By default the output will be named
\fIoutbase\&.txt\fR\&.
\fIoutbase\&.txt\fR\&. When stdout is used as outbase, output will be sent to stdout\&.
.RE
.PP
\fI\-l lang\fR
Expand Down Expand Up @@ -80,6 +80,11 @@ are:
.\}
.RE
.PP
\fI\-o configvar=value\fR
.RS 4
Sets a configuration variable\&. Multiple options can be set by using -o multiple times, once for each option\&.
.RE
.PP
\fI\-v\fR
.RS 4
Returns the current version of the tesseract(1) executable\&.
Expand All @@ -102,7 +107,7 @@ hocr \- Output in hOCR format instead of as a text file\&.
.RE
.RE
.sp
\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\-psm N\fR must occur before any \fIconfigfile\fR\&.
\fBNota Bene:\fR The options \fI\-l lang\fR, \fI\-psm N\fR and \fI\-o configvar=value\fR must occur before any \fIconfigfile\fR\&.
.SH "LANGUAGES"
.sp
There are currently language packs available for the following languages:
Expand Down

0 comments on commit 7dcfd02

Please sign in to comment.