diff --git a/.gitignore b/.gitignore
index 9081efccc9..cbf9cfeef9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,6 +59,8 @@ training/wordlist2dawg
 *.o
 *.Plo
 *.a
+*.class
+*.jar
 
 # tessdata
 *.cube.*
diff --git a/COPYING b/COPYING
index 096aaafb27..8d8d48cf91 100644
--- a/COPYING
+++ b/COPYING
@@ -1,5 +1,5 @@
 This package contains the Tesseract Open Source OCR Engine.
-Orignally developed at Hewlett Packard Laboratories Bristol and
+Originally developed at Hewlett Packard Laboratories Bristol and
 at Hewlett Packard Co, Greeley Colorado, all the code
 in this distribution is now licensed under the Apache License:
 
diff --git a/README.md b/README.md
index 5eff4402f2..c555506280 100644
--- a/README.md
+++ b/README.md
@@ -100,7 +100,7 @@ find its data directory. You must either:
     ./autogen.sh
     ./configure
     make
-    make install
+    sudo make install
     sudo ldconfig
 
 to move the data files to the standard place, or:
diff --git a/api/baseapi.cpp b/api/baseapi.cpp
index bdc02bfe86..fa38d29001 100644
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@@ -1660,7 +1660,7 @@ char* TessBaseAPI::GetUNLVText() {
             word->word->space() > 0 &&
             !word->word->flag(W_FUZZY_NON) &&
             !word->word->flag(W_FUZZY_SP)) {
-          /* Write a space to separate from preceeding good text */
+          /* Write a space to separate from preceding good text */
           *ptr++ = ' ';
           last_char_was_tilde = false;
         }
diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp
index e96f67c481..4f6afbe32e 100644
--- a/api/pdfrenderer.cpp
+++ b/api/pdfrenderer.cpp
@@ -178,7 +178,7 @@ void TessPDFRenderer::AppendPDFObject(const char *data) {
   AppendString((const char *)data);
 }
 
-// Helper function to prevent us from accidentaly writing
+// Helper function to prevent us from accidentally writing
 // scientific notation to an HOCR or PDF file. Besides, three
 // decimal points are all you really need.
 double prec(double x) {
diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp
index e7abadf3d0..501b66c42c 100644
--- a/api/tesseractmain.cpp
+++ b/api/tesseractmain.cpp
@@ -227,7 +227,7 @@ int main(int argc, char **argv) {
   }
 
   // We have 2 possible sources of pagesegmode: a config file and
-  // the command line. For backwards compatability reasons, the
+  // the command line. For backwards compatibility reasons, the
   // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
   // default for this program is tesseract::PSM_AUTO. We will let
   // the config file take priority, so the command-line default
diff --git a/ccmain/control.cpp b/ccmain/control.cpp
index d40c26329b..66a2a8bb3e 100644
--- a/ccmain/control.cpp
+++ b/ccmain/control.cpp
@@ -1556,7 +1556,7 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
        word->fix_quotes();
       if (tessedit_fix_hyphens)
         word->fix_hyphens();
-      /* Dont trust fix_quotes! - though I think I've fixed the bug */
+      /* Don't trust fix_quotes! - though I think I've fixed the bug */
       if (word->best_choice->length() != word->box_word->length()) {
         tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;"
                 " #Blobs=%d\n",
@@ -1694,7 +1694,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(
       goto not_a_word;
     /*
     Allow a single hyphen in a lower case word
-    - dont trust upper case - I've seen several cases of "H" -> "I-I"
+    - don't trust upper case - I've seen several cases of "H" -> "I-I"
     */
     if (lengths[i] == 1 && s[offset] == '-') {
       hyphen_pos = i;
diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp
index 6a7e6e67ef..327d7cbc55 100644
--- a/ccmain/docqual.cpp
+++ b/ccmain/docqual.cpp
@@ -129,7 +129,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
   int expected_outline_count;
 
   if (STRING (outlines_odd).contains (c))
-    return 0;                    //Dont use this char
+    return 0;                    //Don't use this char
   else if (STRING (outlines_2).contains (c))
     expected_outline_count = 2;
   else
@@ -157,7 +157,7 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
  *    - Word segmentation is the same as the original image
  *		- All characters have the expected number of outlines
  * NOTE - the rejection counts are recalculated after unrejection
- *      - CANT do it in a single pass without a bit of fiddling
+ *      - CAN'T do it in a single pass without a bit of fiddling
  *		- keep it simple but inefficient
  *************************************************************************/
 void Tesseract::unrej_good_quality_words(  //unreject potential
@@ -403,7 +403,7 @@ void Tesseract::doc_and_block_rejection(  //reject big chunks
 
 /*************************************************************************
  * reject_whole_page()
- * Dont believe any of it - set the reject map to 00..00 in all words
+ * Don't believe any of it - set the reject map to 00..00 in all words
  *
  *************************************************************************/
 
diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp
index 0a561ac9a0..e42617c053 100644
--- a/ccmain/fixspace.cpp
+++ b/ccmain/fixspace.cpp
@@ -55,7 +55,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
   WERD_RES *word_res;
   WERD_RES_LIST fuzzy_space_words;
   inT16 new_length;
-  BOOL8 prevent_null_wd_fixsp;   // DONT process blobless wds
+  BOOL8 prevent_null_wd_fixsp;   // DON'T process blobless wds
   inT32 word_index;              // current word
 
   block_res_it.set_to_list(&page_res->block_res_list);
@@ -222,7 +222,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
  * fuzzy spaces. The problem with the basic measure is that "561 63" would score
  * the same as "56163", though given our knowledge that the space is fuzzy, and
  * that there is a "1" next to the fuzzy space, we need to ensure that "56163"
- * is prefered.
+ * is preferred.
  *
  * The solution is to NOT COUNT the score of any word which has a digit at one
  * end and a "1Il" as the character the other side of the space.
@@ -272,8 +272,8 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
     } else {
       /*
         Can we add the prev word score and potentially count this word?
-        Yes IF it didnt end in a 1 when the first char of this word is a digit
-          AND it didnt end in a digit when the first char of this word is a 1
+        Yes IF it didn't end in a 1 when the first char of this word is a digit
+          AND it didn't end in a digit when the first char of this word is a 1
       */
       word_len = word->reject_map.length();
       current_word_ok_so_far = FALSE;
@@ -507,7 +507,7 @@ BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
 
   /*
     Use all the standard pass 2 conditions for mode 5 in set_done() in
-    reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT
+    reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
     CARE WHETHER WE HAVE of/at on/an etc.
   */
   if (fixsp_done_mode > 0 &&
diff --git a/ccmain/output.cpp b/ccmain/output.cpp
index 42623b9ec8..ddfcfc54b6 100644
--- a/ccmain/output.cpp
+++ b/ccmain/output.cpp
@@ -297,7 +297,7 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) {  // what char is repeated?
 /*************************************************************************
  * SUSPECT LEVELS
  *
- * 0 - dont reject ANYTHING
+ * 0 - don't reject ANYTHING
  * 1,2 - partial rejection
  * 3 - BEST
  *
@@ -337,7 +337,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
   rating_per_ch = word.rating() / word_res->reject_map.length();
 
   if (rating_per_ch >= suspect_rating_per_ch)
-    return;                      //Dont touch bad ratings
+    return;                      //Don't touch bad ratings
 
   if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
     /* Unreject any Tess Acceptable word - but NOT tess reject chs*/
diff --git a/ccmain/paramsd.cpp b/ccmain/paramsd.cpp
index b141bede62..7784f85361 100644
--- a/ccmain/paramsd.cpp
+++ b/ccmain/paramsd.cpp
@@ -329,13 +329,13 @@ void ParamsEditor::WriteParams(char *filename,
     fclose(fp);
     sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
     int a = sv_window_->ShowYesNoDialog(msg_str);
-    if (a == 'n') { return; }  // dont write
+    if (a == 'n') { return; }  // don't write
   }
 
 
   fp = fopen (filename, "wb");  // can we write to it?
   if (fp == NULL) {
-    sv_window_->AddMessage("Cant write to file " "%s" "", filename);
+    sv_window_->AddMessage("Can't write to file " "%s" "", filename);
     return;
   }
 
diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp
index 607b84179c..aacc80dd6e 100644
--- a/ccmain/reject.cpp
+++ b/ccmain/reject.cpp
@@ -521,7 +521,7 @@ BOOL8 Tesseract::word_contains_non_1_digit(const char *word,
 
 /*************************************************************************
  * dont_allow_1Il()
- * Dont unreject LONE accepted 1Il conflict set chars
+ * Don't unreject LONE accepted 1Il conflict set chars
  *************************************************************************/
 void Tesseract::dont_allow_1Il(WERD_RES *word) {
   int i = 0;
@@ -633,7 +633,7 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
       next_left = 9999;
     else
       next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
-    // Dont touch small or touching blobs - it is too dangerous.
+    // Don't touch small or touching blobs - it is too dangerous.
     if ((out_box.width() > 8 * word_res->denorm.x_scale()) &&
         (out_box.left() > prev_right) && (out_box.right() < next_left)) {
       aspect_ratio = out_box.width() / (float) out_box.height();
diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp
index 0c52f0efd9..e348c93f98 100644
--- a/ccmain/tesseractclass.cpp
+++ b/ccmain/tesseractclass.cpp
@@ -136,7 +136,7 @@ Tesseract::Tesseract()
       BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true,
                   "Try to improve fuzzy spaces", this->params()),
       BOOL_MEMBER(tessedit_unrej_any_wd, false,
-                  "Dont bother with word plausibility", this->params()),
+                  "Don't bother with word plausibility", this->params()),
       BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?",
                   this->params()),
       BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height",
@@ -310,19 +310,19 @@ Tesseract::Tesseract()
                  this->params()),
       INT_MEMBER(crunch_pot_indicators, 1,
                  "How many potential indicators needed", this->params()),
-      BOOL_MEMBER(crunch_leave_ok_strings, true, "Dont touch sensible strings",
+      BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings",
                   this->params()),
       BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring",
                   this->params()),
       BOOL_MEMBER(crunch_leave_accept_strings, false,
-                  "Dont pot crunch sensible strings", this->params()),
+                  "Don't pot crunch sensible strings", this->params()),
       BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures",
                   this->params()),
       INT_MEMBER(crunch_leave_lc_strings, 4,
-                 "Dont crunch words with long lower case strings",
+                 "Don't crunch words with long lower case strings",
                  this->params()),
       INT_MEMBER(crunch_leave_uc_strings, 4,
-                 "Dont crunch words with long lower case strings",
+                 "Don't crunch words with long lower case strings",
                  this->params()),
       INT_MEMBER(crunch_long_repetitions, 3,
                  "Crunch words with long repetitions", this->params()),
@@ -393,21 +393,21 @@ Tesseract::Tesseract()
       INT_MEMBER(suspect_space_level, 100,
                  "Min suspect level for rejecting spaces", this->params()),
       INT_MEMBER(suspect_short_words, 2,
-                 "Dont Suspect dict wds longer than this", this->params()),
+                 "Don't suspect dict wds longer than this", this->params()),
       BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
                   this->params()),
-      double_MEMBER(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit",
+      double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit",
                     this->params()),
       double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
                     this->params()),
       BOOL_MEMBER(tessedit_minimal_rejection, false,
                   "Only reject tess failures", this->params()),
-      BOOL_MEMBER(tessedit_zero_rejection, false, "Dont reject ANYTHING",
+      BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING",
                   this->params()),
       BOOL_MEMBER(tessedit_word_for_word, false,
                   "Make output have exactly one word per WERD", this->params()),
       BOOL_MEMBER(tessedit_zero_kelvin_rejection, false,
-                  "Dont reject ANYTHING AT ALL", this->params()),
+                  "Don't reject ANYTHING AT ALL", this->params()),
       BOOL_MEMBER(tessedit_consistent_reps, true,
                   "Force all rep chars the same", this->params()),
       INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm",
@@ -424,7 +424,7 @@ Tesseract::Tesseract()
                   "Use DOC dawg in 11l conf. detector", this->params()),
       BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test",
                   this->params()),
-      BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Dont double check",
+      BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check",
                   this->params()),
       BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control",
                   this->params()),
diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h
index 50141bf942..6666dec36b 100644
--- a/ccmain/tesseractclass.h
+++ b/ccmain/tesseractclass.h
@@ -733,7 +733,7 @@ class Tesseract : public Wordrec {
                                GenericVector<UNICHAR_ID>* class_ids);
   // Resegments the word to achieve the target_text from the classifier.
   // Returns false if the re-segmentation fails.
-  // Uses brute-force combination of upto kMaxGroupSize adjacent blobs, and
+  // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
   // applies a full search on the classifier results to find the best classified
   // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
   // substitutions ARE used.
@@ -833,7 +833,7 @@ class Tesseract : public Wordrec {
   BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
              "Try to improve fuzzy spaces");
   BOOL_VAR_H(tessedit_unrej_any_wd, false,
-             "Dont bother with word plausibility");
+             "Don't bother with word plausibility");
   BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
   BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");
   BOOL_VAR_H(tessedit_enable_doc_dict, true,
@@ -954,15 +954,15 @@ class Tesseract : public Wordrec {
   double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");
   INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");
   INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");
-  BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings");
+  BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings");
   BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");
   BOOL_VAR_H(crunch_leave_accept_strings, false,
-             "Dont pot crunch sensible strings");
+             "Don't pot crunch sensible strings");
   BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");
   INT_VAR_H(crunch_leave_lc_strings, 4,
-            "Dont crunch words with long lower case strings");
+            "Don't crunch words with long lower case strings");
   INT_VAR_H(crunch_leave_uc_strings, 4,
-            "Dont crunch words with long lower case strings");
+            "Don't crunch words with long lower case strings");
   INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");
   INT_VAR_H(crunch_debug, 0, "As it says");
   INT_VAR_H(fixsp_non_noise_limit, 1,
@@ -1010,16 +1010,16 @@ class Tesseract : public Wordrec {
   INT_VAR_H(suspect_space_level, 100,
             "Min suspect level for rejecting spaces");
   INT_VAR_H(suspect_short_words, 2,
-            "Dont Suspect dict wds longer than this");
+            "Don't Suspect dict wds longer than this");
   BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
-  double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit");
+  double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
   double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
   BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");
-  BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING");
+  BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING");
   BOOL_VAR_H(tessedit_word_for_word, false,
              "Make output have exactly one word per WERD");
   BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,
-             "Dont reject ANYTHING AT ALL");
+             "Don't reject ANYTHING AT ALL");
   BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");
   INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");
   BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");
@@ -1030,7 +1030,7 @@ class Tesseract : public Wordrec {
                "Aspect ratio dot/hyphen test");
   BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");
   BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");
-  BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check");
+  BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check");
   BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");
   BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");
   BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");
diff --git a/ccstruct/blobbox.cpp b/ccstruct/blobbox.cpp
index 322171f0dc..280096b5d3 100644
--- a/ccstruct/blobbox.cpp
+++ b/ccstruct/blobbox.cpp
@@ -33,7 +33,7 @@
 
 ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
 
-// Upto 30 degrees is allowed for rotations of diacritic blobs.
+// Up to 30 degrees is allowed for rotations of diacritic blobs.
 const double kCosSmallAngle = 0.866;
 // Min aspect ratio for a joined word to indicate an obvious flow direction.
 const double kDefiniteAspectRatio = 2.0;
diff --git a/ccstruct/boxread.cpp b/ccstruct/boxread.cpp
index 947fcc02fe..f4aedca5b3 100644
--- a/ccstruct/boxread.cpp
+++ b/ccstruct/boxread.cpp
@@ -35,7 +35,7 @@ FILE* OpenBoxFile(const STRING& fname) {
   FILE* box_file = NULL;
   if (!(box_file = fopen(filename.string(), "rb"))) {
     CANTOPENFILE.error("read_next_box", TESSEXIT,
-                       "Cant open box file %s",
+                       "Can't open box file %s",
                        filename.string());
   }
   return box_file;
diff --git a/ccstruct/normalis.cpp b/ccstruct/normalis.cpp
index d43a1459cb..ddf6dbf3b1 100644
--- a/ccstruct/normalis.cpp
+++ b/ccstruct/normalis.cpp
@@ -382,7 +382,7 @@ void DENORM::LocalDenormTransform(const FCOORD& pt, FCOORD* original) const {
 }
 
 // Transforms the given coords all the way back to source image space using
-// the full transformation sequence defined by this and its predecesors
+// the full transformation sequence defined by this and its predecessors
 // recursively, shallowest first, and finally any block re_rotation.
 // If last_denorm is not NULL, then the last transformation used will
 // be last_denorm, and the block re_rotation will never be executed.
diff --git a/ccstruct/normalis.h b/ccstruct/normalis.h
index c8ce7cd28b..2d75412078 100644
--- a/ccstruct/normalis.h
+++ b/ccstruct/normalis.h
@@ -218,7 +218,7 @@ class DENORM {
   void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
   void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
   // Transforms the given coords all the way back to source image space using
-  // the full transformation sequence defined by this and its predecesors
+  // the full transformation sequence defined by this and its predecessors
   // recursively, shallowest first, and finally any block re_rotation.
   // If last_denorm is not NULL, then the last transformation used will
   // be last_denorm, and the block re_rotation will never be executed.
diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h
index 0dd0bf2ef8..b64eff36d0 100644
--- a/ccstruct/pdblock.h
+++ b/ccstruct/pdblock.h
@@ -108,7 +108,7 @@ class PDBLK
     PDBLK & operator= (const PDBLK & source);
 
   protected:
-    POLY_BLOCK *hand_poly;       //< wierd as well
+    POLY_BLOCK *hand_poly;       //< weird as well
     ICOORDELT_LIST leftside;     //< left side vertices
     ICOORDELT_LIST rightside;    //< right side vertices
     TBOX box;                    //< bounding box
diff --git a/ccstruct/rejctmap.h b/ccstruct/rejctmap.h
index 4b27bab49b..d945dda1fa 100644
--- a/ccstruct/rejctmap.h
+++ b/ccstruct/rejctmap.h
@@ -16,7 +16,7 @@
  ** limitations under the License.
  *
 
-This module may look unneccessarily verbose, but here's the philosophy...
+This module may look unnecessarily verbose, but here's the philosophy...
 
 ALL processing of the reject map is done in this module. There are lots of
 separate calls to set reject/accept flags. These have DELIBERATELY been kept
@@ -51,7 +51,7 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
 enum REJ_FLAGS
 {
   /* Reject modes which are NEVER overridden */
-  R_TESS_FAILURE,                // PERM Tess didnt classify
+  R_TESS_FAILURE,                // PERM Tess didn't classify
   R_SMALL_XHT,                   // PERM Xht too small
   R_EDGE_CHAR,                   // PERM Too close to edge of image
   R_1IL_CONFLICT,                // PERM 1Il confusion
@@ -62,7 +62,7 @@ enum REJ_FLAGS
 
   /* Initial reject modes (pre NN_ACCEPT) */
   R_POOR_MATCH,                  // TEMP Ray's original heuristic (Not used)
-  R_NOT_TESS_ACCEPTED,           // TEMP Tess didnt accept WERD
+  R_NOT_TESS_ACCEPTED,           // TEMP Tess didn't accept WERD
   R_CONTAINS_BLANKS,             // TEMP Tess failed on other chs in WERD
   R_BAD_PERMUTER,                // POTENTIAL Bad permuter for WERD
 
@@ -82,7 +82,7 @@ enum REJ_FLAGS
   R_ROW_REJ,                     // TEMP Row rejection
   R_UNLV_REJ,                    // TEMP ~ turned to - or ^ turned to space
 
-  /* Accept modes which occur inbetween the above rejection groups */
+  /* Accept modes which occur between the above rejection groups */
   R_NN_ACCEPT,                   //NN acceptance
   R_HYPHEN_ACCEPT,               //Hyphen acceptance
   R_MM_ACCEPT,                   //Matrix match acceptance
diff --git a/ccstruct/statistc.cpp b/ccstruct/statistc.cpp
index 63676c2fca..39d5edd180 100644
--- a/ccstruct/statistc.cpp
+++ b/ccstruct/statistc.cpp
@@ -204,7 +204,7 @@ double STATS::ile(double frac) const {
 /**********************************************************************
  * STATS::min_bucket
  *
- * Find REAL minimum bucket - ile(0.0) isnt necessarily correct
+ * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
  **********************************************************************/
 inT32 STATS::min_bucket() const {  // Find min
   if (buckets_ == NULL || total_count_ == 0) {
@@ -219,7 +219,7 @@ inT32 STATS::min_bucket() const {  // Find min
 /**********************************************************************
  * STATS::max_bucket
  *
- * Find REAL maximum bucket - ile(1.0) isnt necessarily correct
+ * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
  **********************************************************************/
 
 inT32 STATS::max_bucket() const {  // Find max
@@ -249,7 +249,7 @@ double STATS::median() const {  //get median
   if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
     inT32 min_pile;
     inT32 max_pile;
-    /* Find preceeding non zero pile */
+    /* Find preceding non zero pile */
     for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
     /* Find following non zero pile */
     for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
diff --git a/ccstruct/vecfuncs.cpp b/ccstruct/vecfuncs.cpp
index 8357c9aabe..bafca55d60 100644
--- a/ccstruct/vecfuncs.cpp
+++ b/ccstruct/vecfuncs.cpp
@@ -23,7 +23,7 @@
  *
  ********************************************************************************
  * Revision 5.1  89/07/27  11:47:50  11:47:50  ray ()
- * Added ratings acces methods.
+ * Added ratings access methods.
  * This version ready for independent development.
  */
 /*----------------------------------------------------------------------
diff --git a/ccutil/clst.cpp b/ccutil/clst.cpp
index 60f88d3706..e71cc20100 100644
--- a/ccutil/clst.cpp
+++ b/ccutil/clst.cpp
@@ -190,7 +190,7 @@ const void *, const void *)) {
 
 // Assuming list has been sorted already, insert new_data to
 // keep the list sorted according to the same comparison function.
-// Comparision function is the same as used by sort, i.e. uses double
+// Comparison function is the same as used by sort, i.e. uses double
 // indirection. Time is O(1) to add to beginning or end.
 // Time is linear to add pre-sorted items to an empty list.
 // If unique, then don't add duplicate entries.
@@ -513,7 +513,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist(                             //from
 
   temp_it.mark_cycle_pt ();
   do {                           //walk sublist
-    if (temp_it.cycled_list ())  //cant find end pt
+    if (temp_it.cycled_list ())  //can't find end pt
       BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, NULL);
 
     if (temp_it.at_last ()) {
diff --git a/ccutil/clst.h b/ccutil/clst.h
index 89c4369949..a209ac11cc 100644
--- a/ccutil/clst.h
+++ b/ccutil/clst.h
@@ -51,11 +51,11 @@ class DLLSYM CLIST_LINK
     }
 
     CLIST_LINK(                       //copy constructor
-               const CLIST_LINK &) {  //dont copy link
+               const CLIST_LINK &) {  //don't copy link
       data = next = NULL;
     }
 
-    void operator= (             //dont copy links
+    void operator= (             //don't copy links
     const CLIST_LINK &) {
       data = next = NULL;
     }
@@ -89,7 +89,7 @@ class DLLSYM CLIST
     void internal_deep_clear (   //destroy all links
       void (*zapper) (void *));  //ptr to zapper functn
 
-    void shallow_clear();  //clear list but dont
+    void shallow_clear();  //clear list but don't
     //delete data elements
 
     bool empty() const {  //is list empty?
@@ -117,7 +117,7 @@ class DLLSYM CLIST
 
     // Assuming list has been sorted already, insert new_data to
     // keep the list sorted according to the same comparison function.
-    // Comparision function is the same as used by sort, i.e. uses double
+    // Comparison function is the same as used by sort, i.e. uses double
     // indirection. Time is O(1) to add to beginning or end.
     // Time is linear to add pre-sorted items to an empty list.
     // If unique, then don't add duplicate entries.
@@ -232,7 +232,7 @@ class DLLSYM CLIST_ITERATOR
     BOOL8 cycled_list();  //Completed a cycle?
 
     void add_to_end(                  //add at end &
-                    void *new_data);  //dont move
+                    void *new_data);  //don't move
 
     void exchange(                            //positions of 2 links
                   CLIST_ITERATOR *other_it);  //other iterator
@@ -437,7 +437,7 @@ inline void CLIST_ITERATOR::add_before_then_move(  // element to add
 /***********************************************************************
  *							CLIST_ITERATOR::add_before_stay_put
  *
- *  Add a new element to the list before the current element but dont move the
+ *  Add a new element to the list before the current element but don't move the
  *  iterator to the new element.
  **********************************************************************/
 
@@ -485,7 +485,7 @@ inline void CLIST_ITERATOR::add_before_stay_put(  // element to add
 /***********************************************************************
  *							CLIST_ITERATOR::add_list_after
  *
- *  Insert another list to this list after the current element but dont move the
+ *  Insert another list to this list after the current element but don't move the
  *  iterator.
  **********************************************************************/
 
@@ -836,7 +836,7 @@ Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens
 
 CLASSNAME is assumed to be the name of a class to be used in a CONS list
 
-NOTE:  Because we dont use virtual functions in the list code, the list code
+NOTE:  Because we don't use virtual functions in the list code, the list code
 will NOT work correctly for classes derived from this.
 
 The macro generates:
@@ -885,7 +885,7 @@ public:																			\
 							CLASSNAME##_CLIST():CLIST() {}						\
 														/* constructor */		\
 																				\
-							CLASSNAME##_CLIST(	/* dont construct */			\
+							CLASSNAME##_CLIST(	/* don't construct */			\
 	const CLASSNAME##_CLIST&)							/*by initial assign*/	\
 	{ DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_CLIST ),			\
 														ABORT, NULL ); }		\
@@ -963,7 +963,7 @@ CLISTIZEH_C( CLASSNAME )
 *  A function which can delete a CLASSNAME element.  This is passed to the		\
 *  generic deep_clear list member function so that when a list is cleared the	\
 *  elements on the list are properly destroyed from the base class, even		\
-*  though we dont use a virtual destructor function.							\
+*  though we don't use a virtual destructor function.							\
 **********************************************************************/			\
 																				\
 DLLSYM void					CLASSNAME##_c1_zapper(		/*delete a link*/		\
diff --git a/ccutil/elst.cpp b/ccutil/elst.cpp
index 7762220d6e..67a8ab0cbe 100644
--- a/ccutil/elst.cpp
+++ b/ccutil/elst.cpp
@@ -117,7 +117,7 @@ inT32 ELIST::length() const {  // count elements
  *							ELIST::sort
  *
  *  Sort elements on list
- *  NB If you dont like the const declarations in the comparator, coerce yours:
+ *  NB If you don't like the const declarations in the comparator, coerce yours:
  *   ( int (*)(const void *, const void *)
  **********************************************************************/
 
@@ -161,7 +161,7 @@ const void *, const void *)) {
 
 // Assuming list has been sorted already, insert new_link to
 // keep the list sorted according to the same comparison function.
-// Comparision function is the same as used by sort, i.e. uses double
+// Comparison function is the same as used by sort, i.e. uses double
 // indirection. Time is O(1) to add to beginning or end.
 // Time is linear to add pre-sorted items to an empty list.
 // If unique is set to true and comparator() returns 0 (an entry with the
@@ -455,7 +455,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist(                             //from
 
   temp_it.mark_cycle_pt ();
   do {                           //walk sublist
-    if (temp_it.cycled_list ())  //cant find end pt
+    if (temp_it.cycled_list ())  //can't find end pt
       BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, NULL);
 
     if (temp_it.at_last ()) {
diff --git a/ccutil/elst.h b/ccutil/elst.h
index dcc1552d16..492c03acb3 100644
--- a/ccutil/elst.h
+++ b/ccutil/elst.h
@@ -67,7 +67,7 @@ The implementation of lists is very careful about space and speed overheads.
 This is why many embedded lists are provided. The same concerns mean that
 in-line type coercion is done, rather than use virtual functions.  This is
 cumbersome in that each data type to be listed requires its own iterator and
-list class - though macros can gererate these.  It also prevents heterogenous
+list class - though macros can gererate these.  It also prevents heterogeneous
 lists.
 **********************************************************************/
 
@@ -98,7 +98,7 @@ class DLLSYM ELIST_LINK
       next = NULL;
     }
 
-    void operator= (             //dont copy links
+    void operator= (             //don't copy links
     const ELIST_LINK &) {
       next = NULL;
     }
@@ -158,7 +158,7 @@ class DLLSYM ELIST
 
     // Assuming list has been sorted already, insert new_link to
     // keep the list sorted according to the same comparison function.
-    // Comparision function is the same as used by sort, i.e. uses double
+    // Comparison function is the same as used by sort, i.e. uses double
     // indirection. Time is O(1) to add to beginning or end.
     // Time is linear to add pre-sorted items to an empty list.
     // If unique is set to true and comparator() returns 0 (an entry with the
@@ -274,7 +274,7 @@ class DLLSYM ELIST_ITERATOR
     bool cycled_list();  //Completed a cycle?
 
     void add_to_end(                        //add at end &
-                    ELIST_LINK *new_link);  //dont move
+                    ELIST_LINK *new_link);  //don't move
 
     void exchange(                            //positions of 2 links
                   ELIST_ITERATOR *other_it);  //other iterator
@@ -470,7 +470,7 @@ inline void ELIST_ITERATOR::add_before_then_move(  // element to add
 /***********************************************************************
  *                          ELIST_ITERATOR::add_before_stay_put
  *
- *  Add a new element to the list before the current element but dont move the
+ *  Add a new element to the list before the current element but don't move the
  *  iterator to the new element.
  **********************************************************************/
 
@@ -515,7 +515,7 @@ inline void ELIST_ITERATOR::add_before_stay_put(  // element to add
 /***********************************************************************
  *                          ELIST_ITERATOR::add_list_after
  *
- *  Insert another list to this list after the current element but dont move the
+ *  Insert another list to this list after the current element but don't move the
  *  iterator.
  **********************************************************************/
 
@@ -868,7 +868,7 @@ Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens
 CLASSNAME is assumed to be the name of a class which has a baseclass of
 ELIST_LINK.
 
-NOTE:  Because we dont use virtual functions in the list code, the list code
+NOTE:  Because we don't use virtual functions in the list code, the list code
 will NOT work correctly for classes derived from this.
 
 The macros generate:
@@ -999,7 +999,7 @@ ELISTIZEH_C( CLASSNAME )
 *  A function which can delete a CLASSNAME element.  This is passed to the  \
 *  generic clear list member function so that when a list is cleared the    \
 *  elements on the list are properly destroyed from the base class, even    \
-*  though we dont use a virtual destructor function.                        \
+*  though we don't use a virtual destructor function.                       \
 **********************************************************************/     \
                                                                             \
 DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link) {                          \
diff --git a/ccutil/elst2.cpp b/ccutil/elst2.cpp
index 7055686fb5..fe5b77e256 100644
--- a/ccutil/elst2.cpp
+++ b/ccutil/elst2.cpp
@@ -118,7 +118,7 @@ inT32 ELIST2::length() const {  // count elements
  *							ELIST2::sort
  *
  *  Sort elements on list
- *  NB If you dont like the const declarations in the comparator, coerce yours:
+ *  NB If you don't like the const declarations in the comparator, coerce yours:
  *   ( int (*)(const void *, const void *)
  **********************************************************************/
 
@@ -162,7 +162,7 @@ const void *, const void *)) {
 
 // Assuming list has been sorted already, insert new_link to
 // keep the list sorted according to the same comparison function.
-// Comparision function is the same as used by sort, i.e. uses double
+// Comparison function is the same as used by sort, i.e. uses double
 // indirection. Time is O(1) to add to beginning or end.
 // Time is linear to add pre-sorted items to an empty list.
 void ELIST2::add_sorted(int comparator(const void*, const void*),
@@ -475,7 +475,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist(                              //fr
 
   temp_it.mark_cycle_pt ();
   do {                           //walk sublist
-    if (temp_it.cycled_list ())  //cant find end pt
+    if (temp_it.cycled_list ())  //can't find end pt
       BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, NULL);
 
     if (temp_it.at_last ()) {
diff --git a/ccutil/elst2.h b/ccutil/elst2.h
index 7201750dcb..f7ea6ed07c 100644
--- a/ccutil/elst2.h
+++ b/ccutil/elst2.h
@@ -69,11 +69,11 @@ class DLLSYM ELIST2_LINK
     }
 
     ELIST2_LINK(                        //copy constructor
-                const ELIST2_LINK &) {  //dont copy link
+                const ELIST2_LINK &) {  //don't copy link
       prev = next = NULL;
     }
 
-    void operator= (             //dont copy links
+    void operator= (             //don't copy links
     const ELIST2_LINK &) {
       prev = next = NULL;
     }
@@ -133,7 +133,7 @@ class DLLSYM ELIST2
 
     // Assuming list has been sorted already, insert new_link to
     // keep the list sorted according to the same comparison function.
-    // Comparision function is the same as used by sort, i.e. uses double
+    // Comparison function is the same as used by sort, i.e. uses double
     // indirection. Time is O(1) to add to beginning or end.
     // Time is linear to add pre-sorted items to an empty list.
     void add_sorted(int comparator(const void*, const void*),
@@ -241,7 +241,7 @@ class DLLSYM ELIST2_ITERATOR
     BOOL8 cycled_list();  //Completed a cycle?
 
     void add_to_end(                         //add at end &
-                    ELIST2_LINK *new_link);  //dont move
+                    ELIST2_LINK *new_link);  //don't move
 
     void exchange(                             //positions of 2 links
                   ELIST2_ITERATOR *other_it);  //other iterator
@@ -450,7 +450,7 @@ inline void ELIST2_ITERATOR::add_before_then_move(  // element to add
 /***********************************************************************
  *							ELIST2_ITERATOR::add_before_stay_put
  *
- *  Add a new element to the list before the current element but dont move the
+ *  Add a new element to the list before the current element but don't move the
  *  iterator to the new element.
  **********************************************************************/
 
@@ -500,7 +500,7 @@ inline void ELIST2_ITERATOR::add_before_stay_put(  // element to add
 /***********************************************************************
  *							ELIST2_ITERATOR::add_list_after
  *
- *  Insert another list to this list after the current element but dont move the
+ *  Insert another list to this list after the current element but don't move the
  *  iterator.
  **********************************************************************/
 
@@ -883,7 +883,7 @@ Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens
 CLASSNAME is assumed to be the name of a class which has a baseclass of
 ELIST2_LINK.
 
-NOTE:  Because we dont use virtual functions in the list code, the list code
+NOTE:  Because we don't use virtual functions in the list code, the list code
 will NOT work correctly for classes derived from this.
 
 The macro generates:
@@ -927,7 +927,7 @@ public:																								\
 							CLASSNAME##_LIST():ELIST2() {} \
 														/* constructor */		\
 																										\
-							CLASSNAME##_LIST(			/* dont construct */ \
+							CLASSNAME##_LIST(			/* don't construct */ \
 	const CLASSNAME##_LIST&)							/*by initial assign*/\
 	{ DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_LIST ),      \
 														ABORT, NULL ); }							\
@@ -1015,7 +1015,7 @@ ELIST2IZEH_C( CLASSNAME )
 *  A function which can delete a CLASSNAME element.  This is passed to the		\
 *  generic clear list member function so that when a list is cleared the		\
 *  elements on the list are properly destroyed from the base class, even		\
-*  though we dont use a virtual destructor function.									\
+*  though we don't use a virtual destructor function.									\
 **********************************************************************/			\
 																										\
 DLLSYM void					CLASSNAME##_zapper(			/*delete a link*/		\
diff --git a/ccutil/errcode.h b/ccutil/errcode.h
index 89385d2b93..69d4187a37 100644
--- a/ccutil/errcode.h
+++ b/ccutil/errcode.h
@@ -53,7 +53,7 @@ enum TessErrorLogCode {
 #define LOC_DOC_BLK_REJ   22
 #define LOC_WRITE_RESULTS 23
 #define LOC_ADAPTIVE    24
-/* DONT DEFINE ANY LOCATION > 31 !!! */
+/* DON'T DEFINE ANY LOCATION > 31 !!! */
 
 /* Sub locatation determines whether pass2 was in normal mode or fix xht mode*/
 #define SUBLOC_NORM     0
diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h
index 8433966bf9..a0ca9e2926 100644
--- a/ccutil/genericvector.h
+++ b/ccutil/genericvector.h
@@ -949,7 +949,7 @@ bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {
 
 // Reads a vector of classes from the given file. Assumes the existence of
 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
-// error. Alse needs T::T() and T::T(constT&), as init_to_size is used in
+// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
 // this function. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
 template <typename T>
diff --git a/ccutil/helpers.h b/ccutil/helpers.h
index 480929c955..022a2c3066 100644
--- a/ccutil/helpers.h
+++ b/ccutil/helpers.h
@@ -61,8 +61,8 @@ class TRand {
  private:
   // Steps the generator to the next value.
   void Iterate() {
-    seed_ *= 6364136223846793005;
-    seed_ += 1442695040888963407;
+    seed_ *= 6364136223846793005ULL;
+    seed_ += 1442695040888963407ULL;
   }
 
   // The current value of the seed.
diff --git a/ccutil/lsterr.h b/ccutil/lsterr.h
index 6bcd7fead1..42ed07e326 100644
--- a/ccutil/lsterr.h
+++ b/ccutil/lsterr.h
@@ -38,6 +38,6 @@ const ERRCODE NULL_PREV = "Previous element on the list is NULL";
 const ERRCODE EMPTY_LIST = "List is empty";
 const ERRCODE BAD_PARAMETER = "List parameter error";
 const ERRCODE STILL_LINKED =
-"Attemting to add an element with non NULL links, to a list";
+"Attempting to add an element with non NULL links, to a list";
 #endif
 #endif
diff --git a/ccutil/ocrclass.h b/ccutil/ocrclass.h
index 37556b30b2..9be184d591 100644
--- a/ccutil/ocrclass.h
+++ b/ccutil/ocrclass.h
@@ -21,7 +21,7 @@
  * the HP OCR interface.
  * The code is designed to be used with either a C or C++ compiler.
  * The structures are designed to allow them to be used with any
- * structure alignment upto 8.
+ * structure alignment up to 8.
  **********************************************************************/
 
 #ifndef            CCUTIL_OCRCLASS_H_
diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp
index 1c9769978a..b44c541246 100644
--- a/ccutil/strngs.cpp
+++ b/ccutil/strngs.cpp
@@ -45,7 +45,7 @@ const int kMaxDoubleSize = 15;
  *
  * The collection of MACROS provide different implementations depending
  * on whether the string keeps track of its strlen or not so that this
- * feature can be added in later when consumers dont modifify the string
+ * feature can be added in later when consumers don't modify the string
  **********************************************************************/
 
 // Smallest string to allocate by default
@@ -339,7 +339,7 @@ STRING& STRING::operator=(const STRING& str) {
   const STRING_HEADER* str_header = str.GetHeader();
   int   str_used = str_header->used_;
 
-  GetHeader()->used_ = 0;  // clear since ensure doesnt need to copy data
+  GetHeader()->used_ = 0;  // clear since ensure doesn't need to copy data
   char* this_cstr = ensure_cstr(str_used);
   STRING_HEADER* this_header = GetHeader();
 
@@ -398,7 +398,7 @@ STRING & STRING::operator=(const char* cstr) {
   if (cstr) {
     int len = strlen(cstr) + 1;
 
-    this_header->used_ = 0;  // dont bother copying data if need to realloc
+    this_header->used_ = 0;  // don't bother copying data if need to realloc
     char* this_cstr = ensure_cstr(len);
     this_header = GetHeader();  // for realloc
     memcpy(this_cstr, cstr, len);
@@ -416,7 +416,7 @@ STRING & STRING::operator=(const char* cstr) {
 
 void STRING::assign(const char *cstr, int len) {
   STRING_HEADER* this_header = GetHeader();
-  this_header->used_ = 0;  // dont bother copying data if need to realloc
+  this_header->used_ = 0;  // don't bother copying data if need to realloc
   char* this_cstr = ensure_cstr(len + 1);  // +1 for '\0'
 
   this_header = GetHeader();  // for realloc
diff --git a/ccutil/tessdatamanager.cpp b/ccutil/tessdatamanager.cpp
index 032d5fee61..23d029bb42 100644
--- a/ccutil/tessdatamanager.cpp
+++ b/ccutil/tessdatamanager.cpp
@@ -51,7 +51,7 @@ bool TessdataManager::Init(const char *data_file_name, int debug_level) {
              sizeof(actual_tessdata_num_entries_));
   }
   if (actual_tessdata_num_entries_ > TESSDATA_NUM_ENTRIES) {
-    // For forward compatability, truncate to the number we can handle.
+    // For forward compatibility, truncate to the number we can handle.
     actual_tessdata_num_entries_ = TESSDATA_NUM_ENTRIES;
   }
   fread(offset_table_, sizeof(inT64),
diff --git a/ccutil/tessdatamanager.h b/ccutil/tessdatamanager.h
index de3e599025..fd2685a1d8 100644
--- a/ccutil/tessdatamanager.h
+++ b/ccutil/tessdatamanager.h
@@ -282,7 +282,7 @@ class TessdataManager {
    * same or smaller than TESSDATA_NUM_ENTRIES, but can never be larger,
    * since then it would be impossible to interpret the type of tessdata at
    * indices same and higher than TESSDATA_NUM_ENTRIES.
-   * This parameter is used to allow for backward compatiblity
+   * This parameter is used to allow for backward compatibility
    * when new tessdata types are introduced.
    */
   inT32 actual_tessdata_num_entries_;
diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp
index 3a6ef1c498..b89f1cb7ae 100644
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@@ -515,7 +515,7 @@ void Classify::EndAdaptiveClassifier() {
  *      load_pre_trained_templates  Indicates whether the pre-trained
  *                     templates (inttemp, normproto and pffmtable components)
  *                     should be lodaded. Should only be set to true if the
- *                     necesary classifier components are present in the
+ *                     necessary classifier components are present in the
  *                     [lang].traineddata file.
  *  Globals:
  *      BuiltInTemplatesFile  file to get built-in temps from
@@ -1720,7 +1720,7 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
  *
  * Globals:
  *
- * @return Number of features extracted or 0 if an error occured.
+ * @return Number of features extracted or 0 if an error occurred.
  * @note Exceptions: none
  * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
  */
@@ -2082,7 +2082,7 @@ void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) {
 
 /*---------------------------------------------------------------------------*/
 /**
- * This routine steps thru each matching class in Results
+ * This routine steps through each matching class in Results
  * and removes it from the match list if its rating
  * is worse than the BestRating plus a pad.  In other words,
  * all good matches get moved to the front of the classes
diff --git a/classify/classify.cpp b/classify/classify.cpp
index c68fc27643..436efd1f2d 100644
--- a/classify/classify.cpp
+++ b/classify/classify.cpp
@@ -151,7 +151,7 @@ Classify::Classify()
       INT_MEMBER(classify_integer_matcher_multiplier, 10,
                  "Integer Matcher Multiplier  0-255:   ", this->params()),
       EnableLearning(true),
-      INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
+      INT_MEMBER(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word",
                  this->params()),
       BOOL_MEMBER(classify_bln_numeric_mode, 0,
                   "Assume the input is numbers [0-9].", this->params()),
diff --git a/classify/classify.h b/classify/classify.h
index e952394630..0de8441527 100644
--- a/classify/classify.h
+++ b/classify/classify.h
@@ -495,7 +495,7 @@ class Classify : public CCStruct {
   // font combinations that the shape represents.
   UnicityTable<FontSet> fontset_table_;
 
-  INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word");
+  INT_VAR_H(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word");
   BOOL_VAR_H(classify_bln_numeric_mode, 0,
              "Assume the input is numbers [0-9].");
   double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size");
diff --git a/classify/cluster.cpp b/classify/cluster.cpp
index ef46f77c21..b723bfa82e 100644
--- a/classify/cluster.cpp
+++ b/classify/cluster.cpp
@@ -182,7 +182,7 @@ struct BUCKETS {
   FLOAT64 ChiSquared;            // test threshold
   uinT16 NumberOfBuckets;        // number of cells in histogram
   uinT16 Bucket[BUCKETTABLESIZE];// mapping to histogram buckets
-  uinT32 *Count;                 // frequency of occurence histogram
+  uinT32 *Count;                 // frequency of occurrence histogram
   FLOAT32 *ExpectedCount;        // expected histogram
 };
 
diff --git a/classify/clusttool.h b/classify/clusttool.h
index a4f3b8351d..e82fa1ef48 100644
--- a/classify/clusttool.h
+++ b/classify/clusttool.h
@@ -24,7 +24,7 @@
 #include <stdio.h>
 
 /*-------------------------------------------------------------------------
-        Public Funtion Prototype
+        Public Function Prototype
 --------------------------------------------------------------------------*/
 uinT16 ReadSampleSize(FILE *File);
 
diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp
index cf9e551509..ad7b799675 100644
--- a/classify/featdefs.cpp
+++ b/classify/featdefs.cpp
@@ -285,7 +285,7 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
 
 /*---------------------------------------------------------------------------*/
 /**
- * Search thru all features currently defined and return
+ * Search through all features currently defined and return
  * the feature type for the feature with the specified short
  * name.  Trap an error if the specified name is not found.
  *
diff --git a/classify/intfx.cpp b/classify/intfx.cpp
index 12966aa195..78aa59bbc9 100644
--- a/classify/intfx.cpp
+++ b/classify/intfx.cpp
@@ -44,7 +44,7 @@ using tesseract::TrainingSample;
 // The entries are in binary degrees where a full circle is 256 binary degrees.
 static float cos_table[INT_CHAR_NORM_RANGE];
 static float sin_table[INT_CHAR_NORM_RANGE];
-// Guards write access to AtanTable so we dont create it more than once.
+// Guards write access to AtanTable so we don't create it more than once.
 tesseract::CCUtilMutex atan_table_mutex;
 
 
diff --git a/classify/kdtree.cpp b/classify/kdtree.cpp
index 8d05149cc1..61a94f66cc 100644
--- a/classify/kdtree.cpp
+++ b/classify/kdtree.cpp
@@ -521,7 +521,7 @@ bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
  * Walk a tree, calling action once on each node.
  *
  * Operation:
- *   This routine walks thru the specified sub_tree and invokes action
+ *   This routine walks through the specified sub_tree and invokes action
  *   action at each node as follows:
  *       action(context, data, level)
  *   data  the data contents of the node being visited,
diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp
index 7f1b04ad44..511c34d41f 100644
--- a/classify/mfoutline.cpp
+++ b/classify/mfoutline.cpp
@@ -104,7 +104,7 @@ LIST ConvertOutlines(TESSLINE *outline,
 
 /*---------------------------------------------------------------------------*/
 /**
- * This routine searches thru the specified outline, computes
+ * This routine searches through the specified outline, computes
  * a slope for each vector in the outline, and marks each
  * vector as having one of the following directions:
  *   N, S, E, W, NE, NW, SE, SW
@@ -182,7 +182,7 @@ void FreeOutlines(LIST Outlines) {
 
 /*---------------------------------------------------------------------------*/
 /**
- * This routine searches thru the specified outline and finds
+ * This routine searches through the specified outline and finds
  * the points at which the outline changes direction.  These
  * points are then marked as "extremities".  This routine is
  * used as an alternative to FindExtremities().  It forces the
diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp
index fea3b14121..74beb18f35 100644
--- a/classify/picofeat.cpp
+++ b/classify/picofeat.cpp
@@ -147,7 +147,7 @@ void ConvertSegmentToPicoFeat(FPOINT *Start,
 
 /*---------------------------------------------------------------------------*/
 /**
- * This routine steps thru the specified outline and cuts it
+ * This routine steps through the specified outline and cuts it
  * up into pieces of equal length.  These pieces become the
  * desired pico-features.  Each segment in the outline
  * is converted into an integral number of pico-features.
diff --git a/cube/beam_search.cpp b/cube/beam_search.cpp
index a89b15d8a9..fd17a1d59f 100644
--- a/cube/beam_search.cpp
+++ b/cube/beam_search.cpp
@@ -93,7 +93,7 @@ void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
   }  // lm_edges
 }
 
-// Performs a beam seach in the specified search using the specified
+// Performs a beam search in the specified search using the specified
 // language model; returns an alternate list of possible words as a result.
 WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
   // verifications
diff --git a/cube/beam_search.h b/cube/beam_search.h
index a39f5b1349..cd8fc0110d 100644
--- a/cube/beam_search.h
+++ b/cube/beam_search.h
@@ -45,7 +45,7 @@ class BeamSearch {
  public:
   explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
   ~BeamSearch();
-  // Performs a beam seach in the specified search using the specified
+  // Performs a beam search in the specified search using the specified
   // language model; returns an alternate list of possible words as a result.
   WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
   // Returns the best node in the last column of last performed search.
diff --git a/cube/conv_net_classifier.cpp b/cube/conv_net_classifier.cpp
index d6ae692e7b..ac33cd33b1 100644
--- a/cube/conv_net_classifier.cpp
+++ b/cube/conv_net_classifier.cpp
@@ -72,7 +72,7 @@ bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
 
 /**
  * A secondary function needed for training. Allows the trainer to set the
- * value of any train-time paramter. This function is currently not
+ * value of any train-time parameter. This function is currently not
  * implemented. TODO(ahmadab): implement end-2-end training
  */
 bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
diff --git a/cube/conv_net_classifier.h b/cube/conv_net_classifier.h
index e9bcd8c2cc..b9e7692c28 100644
--- a/cube/conv_net_classifier.h
+++ b/cube/conv_net_classifier.h
@@ -55,7 +55,7 @@ class ConvNetCharClassifier : public CharClassifier {
   // is currently not implemented. TODO(ahmadab): implement end-2-end training
   virtual bool Train(CharSamp *char_samp, int ClassID);
   // A secondary function needed for training. Allows the trainer to set the
-  // value of any train-time paramter. This function is currently not
+  // value of any train-time parameter. This function is currently not
   // implemented. TODO(ahmadab): implement end-2-end training
   virtual bool SetLearnParam(char *var_name, float val);
   // Externally sets the Neural Net used by the classifier. Used for training
diff --git a/cube/cube_line_object.cpp b/cube/cube_line_object.cpp
index 64b90cadff..0325453740 100644
--- a/cube/cube_line_object.cpp
+++ b/cube/cube_line_object.cpp
@@ -247,7 +247,7 @@ int CubeLineObject::ComputeWordBreakThreshold(int con_comp_cnt,
     word_break_threshold--;
   } while (!valid && word_break_threshold > 0);
 
-  // failed to find a threshold that acheives the target aspect ratio.
+  // failed to find a threshold that achieves the target aspect ratio.
   // Just use the default threshold
   return  static_cast<int>(line_pix_->h *
                            cntxt_->Params()->MaxSpaceHeightRatio());
diff --git a/cube/cube_line_segmenter.cpp b/cube/cube_line_segmenter.cpp
index 82f8c8ede4..278011f090 100644
--- a/cube/cube_line_segmenter.cpp
+++ b/cube/cube_line_segmenter.cpp
@@ -237,7 +237,7 @@ Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
   return NULL;
 }
 
-// split a line continously until valid or fail
+// split a line continuously until valid or fail
 Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) {
   // clone the line mask
   Pix *line_pix = pixClone(line_mask_pix);
@@ -739,7 +739,7 @@ bool CubeLineSegmenter::LineSegment() {
   return true;
 }
 
-// Estimate the paramters of the font(s) used in the page
+// Estimate the parameters of the font(s) used in the page
 bool CubeLineSegmenter::EstimateFontParams() {
   int hgt_hist[kHgtBins];
   int max_hgt;
diff --git a/cube/cube_search_object.cpp b/cube/cube_search_object.cpp
index 0cf54e31a9..61294f26b6 100644
--- a/cube/cube_search_object.cpp
+++ b/cube/cube_search_object.cpp
@@ -212,7 +212,7 @@ CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
     samp->SetLastChar(last_char ? 255 : 0);
   } else {
     // for non cursive languages, these features correspond
-    // to whether the charsamp is at the begining or end of the word
+    // to whether the charsamp is at the beginning or end of the word
     samp->SetFirstChar((start_pt == -1) ? 255 : 0);
     samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
   }
diff --git a/cube/cube_search_object.h b/cube/cube_search_object.h
index 8452417a69..0a6c3ce20b 100644
--- a/cube/cube_search_object.h
+++ b/cube/cube_search_object.h
@@ -114,7 +114,7 @@ class CubeSearchObject : public SearchObject {
             end_pt <= (start_pt + max_seg_per_char_));
   }
   // computes the space and no space costs at gaps between segments
-  // return true on sucess
+  // return true on success
   bool ComputeSpaceCosts();
 };
 }
diff --git a/cube/hybrid_neural_net_classifier.cpp b/cube/hybrid_neural_net_classifier.cpp
index b5822f6f22..671a74acdf 100644
--- a/cube/hybrid_neural_net_classifier.cpp
+++ b/cube/hybrid_neural_net_classifier.cpp
@@ -72,7 +72,7 @@ bool HybridNeuralNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
 }
 
 // A secondary function needed for training. Allows the trainer to set the
-// value of any train-time paramter. This function is currently not
+// value of any train-time parameter. This function is currently not
 // implemented. TODO(ahmadab): implement end-2-end training
 bool HybridNeuralNetCharClassifier::SetLearnParam(char *var_name, float val) {
   // TODO(ahmadab): implementation of parameter initializing.
@@ -151,7 +151,7 @@ bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) {
     return false;
   }
 
-  // go thru all the nets
+  // go through all the nets
   memset(net_output_, 0, class_cnt * sizeof(*net_output_));
   float *inputs = net_input_;
   for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
diff --git a/cube/hybrid_neural_net_classifier.h b/cube/hybrid_neural_net_classifier.h
index 0ab9ba1235..6ad6233f43 100644
--- a/cube/hybrid_neural_net_classifier.h
+++ b/cube/hybrid_neural_net_classifier.h
@@ -48,7 +48,7 @@ class HybridNeuralNetCharClassifier : public CharClassifier {
   // is currently not implemented. TODO(ahmadab): implement end-2-end training
   virtual bool Train(CharSamp *char_samp, int ClassID);
   // A secondary function needed for training. Allows the trainer to set the
-  // value of any train-time paramter. This function is currently not
+  // value of any train-time parameter. This function is currently not
   // implemented. TODO(ahmadab): implement end-2-end training
   virtual bool SetLearnParam(char *var_name, float val);
   // Externally sets the Neural Net used by the classifier. Used for training
diff --git a/cube/tess_lang_model.cpp b/cube/tess_lang_model.cpp
index 8b4ff68ee4..5113207260 100644
--- a/cube/tess_lang_model.cpp
+++ b/cube/tess_lang_model.cpp
@@ -397,7 +397,7 @@ int TessLangModel::NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array) {
     return 0;
   }
 
-  // go thru all valid transitions from the state
+  // go through all valid transitions from the state
   int edge_cnt = 0;
 
   EDGE_REF new_edge_ref;
diff --git a/cutil/listio.h b/cutil/listio.h
index e758c9bcb2..7d9c19f777 100644
--- a/cutil/listio.h
+++ b/cutil/listio.h
@@ -37,7 +37,7 @@
 #include "oldlist.h"
 
 /*----------------------------------------------------------------------------
-        Public Funtion Prototypes
+        Public Function Prototypes
 --------------------------------------------------------------------------*/
 LIST read_list(const char *filename);
 #endif
diff --git a/cutil/oldlist.cpp b/cutil/oldlist.cpp
index cf93ffb518..52c0d8680a 100644
--- a/cutil/oldlist.cpp
+++ b/cutil/oldlist.cpp
@@ -407,7 +407,7 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) {
  *
  *  Search list, return NIL_LIST if not found. Return the list starting from
  *  the item if found.  The compare routine "is_equal" is passed in as
- *  the third paramter to this routine.   If the value NULL is supplied
+ *  the third parameter to this routine.   If the value NULL is supplied
  *  for is_equal, the is_key routine will be used.
  **********************************************************************/
 LIST search(LIST list, void *key, int_compare is_equal) {
diff --git a/cutil/oldlist.h b/cutil/oldlist.h
index 103dd72592..a0130ae061 100644
--- a/cutil/oldlist.h
+++ b/cutil/oldlist.h
@@ -234,7 +234,7 @@ first_node (list_rest (l))
 first_node (list_rest (list_rest (l)))
 
 /*----------------------------------------------------------------------
-          Public Funtion Prototypes
+          Public Function Prototypes
 ----------------------------------------------------------------------*/
 int count(LIST var_list);
 
diff --git a/dict/context.cpp b/dict/context.cpp
index 206447d98f..a9acb137c3 100644
--- a/dict/context.cpp
+++ b/dict/context.cpp
@@ -33,7 +33,7 @@ static const int kMinAbsoluteGarbageWordLength = 10;
 static const float kMinAbsoluteGarbageAlphanumFrac = 0.5f;
 
 const int case_state_table[6][4] = { {
-                                  /*  0. Begining of word        */
+                                  /*  0. Beginning of word       */
     /*    P   U   L   D                                          */
                                   /* -1. Error on case           */
       0, 1, 5, 4
diff --git a/dict/dawg.h b/dict/dawg.h
index a487d3fd1c..b37e771503 100644
--- a/dict/dawg.h
+++ b/dict/dawg.h
@@ -447,7 +447,7 @@ class SquishedDawg : public Dawg {
     EDGE_REF edge = node;
     if (!edge_occupied(edge) || edge == NO_EDGE) return;
     assert(forward_edge(edge));  // we don't expect any backward edges to
-    do {                         // be present when this funciton is called
+    do {                         // be present when this function is called
       if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
         vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
       }
diff --git a/dict/dict.cpp b/dict/dict.cpp
index 8df5b63bb4..e59b00d58b 100644
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@@ -127,7 +127,7 @@ Dict::Dict(CCUtil* ccutil)
                   " when there is a need to explore all segmentations",
                   getCCUtil()->params()),
       BOOL_MEMBER(save_raw_choices, false,
-                  "Deprecated- backward compatablity only",
+                  "Deprecated- backward compatibility only",
                   getCCUtil()->params()),
       INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
                  "Max words to keep in list",
diff --git a/dict/dict.h b/dict/dict.h
index 7556bc5460..938ca3a332 100644
--- a/dict/dict.h
+++ b/dict/dict.h
@@ -614,7 +614,7 @@ class Dict {
              "Make AcceptableChoice() always return false. Useful"
              " when there is a need to explore all segmentations");
   BOOL_VAR_H(save_raw_choices, false,
-             "Deprecated- backward compatability only");
+             "Deprecated- backward compatibility only");
   INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
   STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information"
                " should be printed to stdout");
diff --git a/dict/permdawg.cpp b/dict/permdawg.cpp
index 7d60d395ff..71e2deca43 100644
--- a/dict/permdawg.cpp
+++ b/dict/permdawg.cpp
@@ -303,7 +303,7 @@ void Dict::append_choices(
  *
  * The given prev_char_frag_info contains:
  * - fragment: if not NULL contains information about immediately
- *   preceeding fragmented character choice
+ *   preceding fragmented character choice
  * - num_fragments: number of fragments that have been used so far
  *   to construct a character
  * - certainty: certainty of the current choice or minimum
diff --git a/doc/Doxyfile b/doc/Doxyfile
index 673defaf10..c4f496be39 100644
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
@@ -1657,7 +1657,7 @@ EXTRA_PACKAGES         =
 # following commands have a special meaning inside the header: $title,
 # $datetime, $date, $doxygenversion, $projectname, $projectnumber,
 # $projectbrief, $projectlogo. Doxygen will replace $title with the empy string,
-# for the replacement values of the other commands the user is refered to
+# for the replacement values of the other commands the user is referred to
 # HTML_HEADER.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
diff --git a/java/Makefile.am b/java/Makefile.am
index 3ed962dfcc..fddbc6f9ec 100644
--- a/java/Makefile.am
+++ b/java/Makefile.am
@@ -42,18 +42,22 @@ SCROLLVIEW_LIBS = \
 CLASSPATH = $(srcdir)/piccolo2d-core-3.0.jar:$(srcdir)/piccolo2d-extras-3.0.jar
 
 ScrollView.jar : $(SCROLLVIEW_CLASSES)
-	$(JAR) cf $@ com/google/scrollview/*.class \
+	$(JAR) cfm $@ Manifest.txt com/google/scrollview/*.class \
            com/google/scrollview/events/*.class com/google/scrollview/ui/*.class
 
 $(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES)
 	$(JAVAC) -encoding UTF8 -sourcepath $(srcdir) -classpath $(CLASSPATH) $(SCROLLVIEW_FILES) -d $(builddir)
 
+fetch-jars :
+	curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0/piccolo2d-core-3.0.jar > piccolo2d-core-3.0.jar
+	curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0/piccolo2d-extras-3.0.jar > piccolo2d-extras-3.0.jar
+
 .PHONY: install-jars
 install-jars : ScrollView.jar
 	@if [ ! -d  $(scrollview_path) ]; then mkdir -p $(scrollview_path); fi;
 	$(INSTALL) -m 644 $(SCROLLVIEW_LIBS) $(scrollview_path);
 	$(INSTALL) -m 644 ScrollView.jar $(scrollview_path);
-	@echo "Don't forget to set eviroment variable SCROLLVIEW_PATH to $(scrollview_path)";
+	@echo "Don't forget to set environment variable SCROLLVIEW_PATH to $(scrollview_path)";
 
 uninstall:
 	rm -f $(scrollview_path)/*.jar
diff --git a/java/Manifest.txt b/java/Manifest.txt
new file mode 100644
index 0000000000..bc0b707bd8
--- /dev/null
+++ b/java/Manifest.txt
@@ -0,0 +1,2 @@
+Main-Class: com/google/scrollview/ScrollView
+Class-Path: ScrollView.jar piccolo2d-core-3.0.jar piccolo2d-extras-3.0.jar
diff --git a/java/com/google/scrollview/ui/SVMenuBar.java b/java/com/google/scrollview/ui/SVMenuBar.java
index 7c2f5d9af8..9a87524ef0 100644
--- a/java/com/google/scrollview/ui/SVMenuBar.java
+++ b/java/com/google/scrollview/ui/SVMenuBar.java
@@ -50,7 +50,7 @@ public SVMenuBar(SVWindow scrollView) {
 
 
   /**
-   * A click on one of the items in our menubar has occured. Forward it
+   * A click on one of the items in our menubar has occurred. Forward it
    * to the item itself to let it decide what happens.
    */
   public void actionPerformed(ActionEvent e) {
@@ -111,7 +111,7 @@ else if (id == -1) {
    * @param name The caption of the new entry.
    * @param id The Id of the new entry. If it is -1, the entry will be treated
    *        as a menu.
-   * @param b Whether the entry is initally flagged.
+   * @param b Whether the entry is initially flagged.
    *
    */
 
diff --git a/java/com/google/scrollview/ui/SVPopupMenu.java b/java/com/google/scrollview/ui/SVPopupMenu.java
index 6427c0ef85..14c8b3acd3 100644
--- a/java/com/google/scrollview/ui/SVPopupMenu.java
+++ b/java/com/google/scrollview/ui/SVPopupMenu.java
@@ -123,7 +123,7 @@ public void add(String parent, String name, int id, String value, String desc) {
 
 
   /**
-   * A click on one of the items in our menubar has occured. Forward it
+   * A click on one of the items in our menubar has occurred. Forward it
    * to the item itself to let it decide what happens.
    */
   public void actionPerformed(ActionEvent e) {
diff --git a/java/com/google/scrollview/ui/SVWindow.java b/java/com/google/scrollview/ui/SVWindow.java
index f4960276f6..267bfdda03 100644
--- a/java/com/google/scrollview/ui/SVWindow.java
+++ b/java/com/google/scrollview/ui/SVWindow.java
@@ -298,7 +298,7 @@ public void addMessageBox() {
       ta.setEditable(false);
       getContentPane().add(ta, BorderLayout.SOUTH);
     }
-    // We need to make the window bigger to accomodate the message box.
+    // We need to make the window bigger to accommodate the message box.
     winSizeY += DEF_MESSAGEBOX_HEIGHT;
     setSize(winSizeX, winSizeY);
   }
diff --git a/training/language-specific.sh b/training/language-specific.sh
index bc64f67c88..23dee3e1cd 100755
--- a/training/language-specific.sh
+++ b/training/language-specific.sh
@@ -780,7 +780,7 @@ VERTICAL_FONTS=( \
 #      holds the text corpus file for the language, used in phase F
 #   ${FONTS[@]}
 #      holds a sequence of applicable fonts for the language, used in
-#      phase F & I
+#      phase F & I. only set if not already set, i.e. from command line
 #   ${TRAINING_DATA_ARGUMENTS}
 #      non-default arguments to the training_data program used in phase T
 #   ${FILTER_ARGUMENTS} -
@@ -794,7 +794,6 @@ set_lang_specific_parameters() {
   local lang=$1
   # The default text location is now given directly from the language code.
   TEXT_CORPUS="${FLAGS_webtext_prefix}/${lang}.corpus.txt"
-  FONTS=( "${LATIN_FONTS[@]}" )
   FILTER_ARGUMENTS=""
   WORDLIST2DAWG_ARGUMENTS=""
   # These dawg factors represent the fraction of the corpus not covered by the
@@ -816,30 +815,30 @@ set_lang_specific_parameters() {
   case ${lang} in
     # Latin languages.
     enm ) TEXT2IMAGE_EXTRA_ARGS=" --ligatures"   # Add ligatures when supported
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
     frm ) TEXT_CORPUS="${FLAGS_webtext_prefix}/fra.corpus.txt"
           # Make long-s substitutions for Middle French text
           FILTER_ARGUMENTS="--make_early_language_variant=fra"
           TEXT2IMAGE_EXTRA_ARGS=" --ligatures"   # Add ligatures when supported.
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
     frk ) TEXT_CORPUS="${FLAGS_webtext_prefix}/deu.corpus.txt"
-          FONTS=( "${FRAKTUR_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${FRAKTUR_FONTS[@]}" );;
     ita_old )
           TEXT_CORPUS="${FLAGS_webtext_prefix}/ita.corpus.txt"
           # Make long-s substitutions for Early Italian text
           FILTER_ARGUMENTS="--make_early_language_variant=ita"
           TEXT2IMAGE_EXTRA_ARGS=" --ligatures"   # Add ligatures when supported.
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
     spa_old )
           TEXT_CORPUS="${FLAGS_webtext_prefix}/spa.corpus.txt"
           # Make long-s substitutions for Early Spanish text
           FILTER_ARGUMENTS="--make_early_language_variant=spa"
           TEXT2IMAGE_EXTRA_ARGS=" --ligatures"  # Add ligatures when supported.
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
     srp_latn )
           TEXT_CORPUS=${FLAGS_webtext_prefix}/srp.corpus.txt ;;
     vie ) TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${VIETNAMESE_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${VIETNAMESE_FONTS[@]}" ) ;;
     # Highly inflective languages get a bigger dawg size.
     # TODO(rays) Add more here!
     hun ) WORD_DAWG_SIZE=1000000 ;;
@@ -899,14 +898,14 @@ set_lang_specific_parameters() {
           # Strip unrenderable words as not all fonts will render the extended
           # latin symbols found in Vietnamese text.
           WORD_DAWG_SIZE=1000000
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
 
     # Cyrillic script-based languages.
-    rus ) FONTS=( "${RUSSIAN_FONTS[@]}" )
+    rus ) test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" )
           NUMBER_DAWG_FACTOR=0.05
           WORD_DAWG_SIZE=1000000 ;;
     aze_cyrl | bel | bul | kaz | mkd | srp | tgk | ukr | uzb_cyrl )
-          FONTS=( "${RUSSIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) ;;
 
     # Special code for performing Cyrillic language-id that is trained on
     # Russian, Serbian, Ukranian, Belarusian, Macedonian, Tajik and Mongolian
@@ -916,70 +915,70 @@ set_lang_specific_parameters() {
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
           GENERATE_WORD_BIGRAMS=0
           WORD_DAWG_SIZE=1000000
-          FONTS=( "${RUSSIAN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" );;
 
     # South Asian scripts mostly have a lot of different graphemes, so trim
     # down the MEAN_COUNT so as not to get a huge amount of text.
     asm | ben )
           MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
-          FONTS=( "${BENGALI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${BENGALI_FONTS[@]}" ) ;;
     bih | hin | mar | nep | san )
           MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
-          FONTS=( "${DEVANAGARI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${DEVANAGARI_FONTS[@]}" ) ;;
     bod ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
-          FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
     dzo )
           WORD_DAWG_FACTOR=0.01
-          FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
     guj ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
-          FONTS=( "${GUJARATI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${GUJARATI_FONTS[@]}" ) ;;
     kan ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
           TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${KANNADA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${KANNADA_FONTS[@]}" ) ;;
     mal ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
           TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${MALAYALAM_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${MALAYALAM_FONTS[@]}" ) ;;
     ori )
           WORD_DAWG_FACTOR=0.01
-          FONTS=( "${ORIYA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${ORIYA_FONTS[@]}" ) ;;
     pan ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.01
-          FONTS=( "${PUNJABI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${PUNJABI_FONTS[@]}" ) ;;
     sin ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.01
-          FONTS=( "${SINHALA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${SINHALA_FONTS[@]}" ) ;;
     tam ) MEAN_COUNT="30"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
           TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${TAMIL_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TAMIL_FONTS[@]}" ) ;;
     tel ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
           TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${TELUGU_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TELUGU_FONTS[@]}" ) ;;
 
     # SouthEast Asian scripts.
     khm ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${KHMER_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${KHMER_FONTS[@]}" ) ;;
     lao ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${LAOTHIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${LAOTHIAN_FONTS[@]}" ) ;;
     mya ) MEAN_COUNT="12"
           WORD_DAWG_FACTOR=0.15
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${BURMESE_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${BURMESE_FONTS[@]}" ) ;;
     tha ) MEAN_COUNT="30"
           WORD_DAWG_FACTOR=0.01
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
@@ -987,7 +986,7 @@ set_lang_specific_parameters() {
           TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
           AMBIGS_FILTER_DENOMINATOR="1000"
           LEADING=48
-          FONTS=( "${THAI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${THAI_FONTS[@]}" ) ;;
 
     # CJK
     chi_sim )
@@ -998,7 +997,7 @@ set_lang_specific_parameters() {
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
           TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
           FILTER_ARGUMENTS="--charset_filter=chi_sim --segmenter_lang=chi_sim"
-          FONTS=( "${CHI_SIM_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${CHI_SIM_FONTS[@]}" ) ;;
     chi_tra )
           MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.015
@@ -1006,14 +1005,14 @@ set_lang_specific_parameters() {
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
           TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
           FILTER_ARGUMENTS="--charset_filter=chi_tra --segmenter_lang=chi_tra"
-          FONTS=( "${CHI_TRA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${CHI_TRA_FONTS[@]}" ) ;;
     jpn ) MEAN_COUNT="15"
           WORD_DAWG_FACTOR=0.015
           GENERATE_WORD_BIGRAMS=0
           TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
           TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
           FILTER_ARGUMENTS="--charset_filter=jpn --segmenter_lang=jpn"
-          FONTS=( "${JPN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${JPN_FONTS[@]}" ) ;;
     kor ) MEAN_COUNT="20"
           WORD_DAWG_FACTOR=0.015
           NUMBER_DAWG_FACTOR=0.05
@@ -1021,38 +1020,38 @@ set_lang_specific_parameters() {
           TRAINING_DATA_ARGUMENTS+=" --desired_bigrams="
           GENERATE_WORD_BIGRAMS=0
           FILTER_ARGUMENTS="--charset_filter=kor --segmenter_lang=kor"
-          FONTS=( "${KOREAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${KOREAN_FONTS[@]}" ) ;;
 
     # Middle-Eastern scripts.
-    ara ) FONTS=( "${ARABIC_FONTS[@]}" ) ;;
-    div ) FONTS=( "${THAANA_FONTS[@]}" ) ;;
+    ara ) test -z "$FONTS" && FONTS=( "${ARABIC_FONTS[@]}" ) ;;
+    div ) test -z "$FONTS" && FONTS=( "${THAANA_FONTS[@]}" ) ;;
     fas | pus | snd | uig | urd )
-          FONTS=( "${PERSIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${PERSIAN_FONTS[@]}" ) ;;
     heb | yid )
           NUMBER_DAWG_FACTOR=0.05
           WORD_DAWG_FACTOR=0.08
-          FONTS=( "${HEBREW_FONTS[@]}" ) ;;
-    syr ) FONTS=( "${SYRIAC_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${HEBREW_FONTS[@]}" ) ;;
+    syr ) test -z "$FONTS" && FONTS=( "${SYRIAC_FONTS[@]}" ) ;;
 
     # Other scripts.
     amh | tir)
-          FONTS=( "${AMHARIC_FONTS[@]}" ) ;;
-    chr ) FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" \
+          test -z "$FONTS" && FONTS=( "${AMHARIC_FONTS[@]}" ) ;;
+    chr ) test -z "$FONTS" && FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" \
                   "Noto Sans Cherokee" \
                 ) ;;
     ell | grc )
           NUMBER_DAWG_FACTOR=0.05
           WORD_DAWG_FACTOR=0.08
-          FONTS=( "${GREEK_FONTS[@]}" ) ;;
-    hye ) FONTS=( "${ARMENIAN_FONTS[@]}" ) ;;
-    iku ) FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" ) ;;
-    kat)  FONTS=( "${GEORGIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${GREEK_FONTS[@]}" ) ;;
+    hye ) test -z "$FONTS" && FONTS=( "${ARMENIAN_FONTS[@]}" ) ;;
+    iku ) test -z "$FONTS" && FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" ) ;;
+    kat)  test -z "$FONTS" && FONTS=( "${GEORGIAN_FONTS[@]}" ) ;;
     kat_old)
           TEXT_CORPUS="${FLAGS_webtext_prefix}/kat.corpus.txt"
-          FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
-    kir ) FONTS=( "${KYRGYZ_FONTS[@]}" )
+          test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
+    kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" )
           TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;;
-    kur ) FONTS=( "${KURDISH_FONTS[@]}" ) ;;
+    kur ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;
 
     *) err "Error: ${lang} is not a valid language code"
   esac
@@ -1061,6 +1060,8 @@ set_lang_specific_parameters() {
   elif [[ ! -z ${MEAN_COUNT} ]]; then
     TRAINING_DATA_ARGUMENTS+=" --mean_count=${MEAN_COUNT}"
   fi
+  # Default to Latin fonts if none have been set
+  test -z "$FONTS" && test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" )
 }
 
 #=============================================================================
diff --git a/training/tesstrain.sh b/training/tesstrain.sh
index ecf2072083..c1af1e86c1 100755
--- a/training/tesstrain.sh
+++ b/training/tesstrain.sh
@@ -17,7 +17,6 @@
 # USAGE:
 #
 # tesstrain.sh
-#    --bin_dir PATH             # Location of training program.
 #    --fontlist FONTS_STR       # A plus-separated list of fontnames to train on.
 #    --fonts_dir FONTS_PATH     # Path to font files.
 #    --lang LANG_CODE           # ISO 639 code.
@@ -25,6 +24,7 @@
 #    --output_dir OUTPUTDIR     # Location of output traineddata file.
 #    --overwrite                # Safe to overwrite files in output_dir.
 #    --run_shape_clustering     # Run shape clustering (use for Indic langs).
+#    --exposures EXPOSURES      # A list of exposure levels to use (e.g. "-1 0 1").
 #
 # OPTIONAL flags for input data. If unspecified we will look for them in
 # the langdata_dir directory.
@@ -49,11 +49,8 @@ source `dirname $0`/tesstrain_utils.sh
 ARGV=("$@")
 parse_flags
 
-tlog "\n=== Starting training for language '${LANG_CODE}'"
-
-tlog "Cleaning workspace directory ${TRAINING_DIR}..."
 mkdir -p ${TRAINING_DIR}
-rm -fr ${TRAINING_DIR}/*
+tlog "\n=== Starting training for language '${LANG_CODE}'"
 
 source `dirname $0`/language-specific.sh
 set_lang_specific_parameters ${LANG_CODE}
diff --git a/training/tesstrain_utils.sh b/training/tesstrain_utils.sh
index a3ad7f5142..30006bc1f7 100755
--- a/training/tesstrain_utils.sh
+++ b/training/tesstrain_utils.sh
@@ -16,10 +16,6 @@
 #
 # USAGE: source tesstrain_utils.sh
 
-FONTS=(
-    "Arial" \
-    "Times New Roman," \
-)
 if [ "$(uname)" == "Darwin" ];then
     FONTS_DIR="/Library/Fonts/"
 else
@@ -29,7 +25,8 @@ OUTPUT_DIR="/tmp/tesstrain/tessdata"
 OVERWRITE=0
 RUN_SHAPE_CLUSTERING=0
 EXTRACT_FONT_PROPERTIES=1
-WORKSPACE_DIR="/tmp/tesstrain"
+WORKSPACE_DIR=`mktemp -d`
+EXPOSURES=0
 
 # Logging helper functions.
 tlog() {
@@ -45,11 +42,11 @@ err_exit() {
 # if the program file is not found.
 # Usage: run_command CMD ARG1 ARG2...
 run_command() {
-    local cmd=$1
-    shift
-    if [[ ! -x ${cmd} ]]; then
-        err_exit "File ${cmd} not found"
+    local cmd=`which $1`
+    if [[ -z ${cmd} ]]; then
+        err_exit "$1 not found"
     fi
+    shift
     tlog "[$(date)] ${cmd} $@"
     ${cmd} "$@" 2>&1 1>&2 | tee -a ${LOG_FILE}
     # check completion status
@@ -69,22 +66,6 @@ check_file_readable() {
     done
 }
 
-# Set global path variables that are based on parsed flags.
-set_prog_paths() {
-    if [[ -z ${BINDIR} ]]; then
-        err_exit "Need to specify location of program files"
-    fi
-    CN_TRAINING_EXE=${BINDIR}/cntraining
-    COMBINE_TESSDATA_EXE=${BINDIR}/combine_tessdata
-    MF_TRAINING_EXE=${BINDIR}/mftraining
-    SET_UNICHARSET_PROPERTIES_EXE=${BINDIR}/set_unicharset_properties
-    SHAPE_TRAINING_EXE=${BINDIR}/shapeclustering
-    TESSERACT_EXE=${BINDIR}/tesseract
-    TEXT2IMAGE_EXE=${BINDIR}/text2image
-    UNICHARSET_EXTRACTOR_EXE=${BINDIR}/unicharset_extractor
-    WORDLIST2DAWG_EXE=${BINDIR}/wordlist2dawg
-}
-
 # Sets the named variable to given value. Aborts if the value is missing or
 # if it looks like a flag.
 # Usage: parse_value VAR_NAME VALUE
@@ -109,9 +90,6 @@ parse_flags() {
         case ${ARGV[$i]} in
             --)
                 break;;
-            --bin_dir)
-                parse_value "BINDIR" ${ARGV[$j]}
-                i=$j ;;
             --fontlist)   # Expect a plus-separated list of names
                 if [[ -z ${ARGV[$j]} ]] || [[ ${ARGV[$j]:0:2} == "--" ]]; then
                     err_exit "Invalid value passed to --fontlist"
@@ -121,6 +99,16 @@ parse_flags() {
                 FONTS=( ${ARGV[$j]} )
                 IFS=$ofs
                 i=$j ;;
+            --exposures)
+                exp=""
+                while test $j -lt ${#ARGV[@]}; do
+                    test -z ${ARGV[$j]} && break
+                    test `echo ${ARGV[$j]} | cut -c -2` = "--" && break
+                    exp="$exp ${ARGV[$j]}"
+                    j=$((j+1))
+                done
+                parse_value "EXPOSURES" "$exp"
+                i=$((j-1)) ;;
             --fonts_dir)
                 parse_value "FONTS_DIR" ${ARGV[$j]}
                 i=$j ;;
@@ -156,9 +144,6 @@ parse_flags() {
     if [[ -z ${LANG_CODE} ]]; then
         err_exit "Need to specify a language --lang"
     fi
-    if [[ -z ${BINDIR} ]]; then
-        err_exit "Need to specify path to built binaries --bin_dir"
-    fi
     if [[ -z ${LANGDATA_ROOT} ]]; then
         err_exit "Need to specify path to language files --langdata_dir"
     fi
@@ -171,8 +156,6 @@ parse_flags() {
         fi
     fi
 
-    set_prog_paths
-
     # Location where intermediate files will be created.
     TRAINING_DIR=${WORKSPACE_DIR}/${LANG_CODE}
     # Location of log file for the whole run.
@@ -200,8 +183,8 @@ initialize_fontconfig() {
     export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
     local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt
     echo "Text" >${sample_path}
-    run_command ${TEXT2IMAGE_EXE} --fonts_dir=${FONTS_DIR} \
-        --font="Arial" --outputbase=${sample_path} --text=${sample_path} \
+    run_command text2image --fonts_dir=${FONTS_DIR} \
+        --font="${FONTS[0]}" --outputbase=${sample_path} --text=${sample_path} \
         --fontconfig_tmpdir=${FONT_CONFIG_CACHE}
 }
 
@@ -228,14 +211,14 @@ generate_font_image() {
       fi
     done
 
-    run_command ${TEXT2IMAGE_EXE} ${common_args} --font="${font}" \
+    run_command text2image ${common_args} --font="${font}" \
         --text=${TRAINING_TEXT} ${TEXT2IMAGE_EXTRA_ARGS}
     check_file_readable ${outbase}.box ${outbase}.tif
 
     if (( ${EXTRACT_FONT_PROPERTIES} )) &&
         [[ -r ${TRAIN_NGRAMS_FILE} ]]; then
         tlog "Extracting font properties of ${font}"
-        run_command ${TEXT2IMAGE_EXE} ${common_args} --font="${font}" \
+        run_command text2image ${common_args} --font="${font}" \
             --ligatures=false --text=${TRAIN_NGRAMS_FILE} \
             --only_extract_font_properties --ptsize=32
         check_file_readable ${outbase}.fontinfo
@@ -254,35 +237,36 @@ phase_I_generate_image() {
         err_exit "Could not find training text file ${TRAINING_TEXT}"
     fi
     CHAR_SPACING="0.0"
-    EXPOSURE="0"
-
-    if (( ${EXTRACT_FONT_PROPERTIES} )) && [[ -r ${BIGRAM_FREQS_FILE} ]]; then
-        # Parse .bigram_freqs file and compose a .train_ngrams file with text
-        # for tesseract to recognize during training. Take only the ngrams whose
-        # combined weight accounts for 95% of all the bigrams in the language.
-        NGRAM_FRAC=$(cat ${BIGRAM_FREQS_FILE} \
-            | awk '{s=s+$2}; END {print (s/100)*p}' p=99)
-        cat ${BIGRAM_FREQS_FILE} | sort -rnk2 \
-            | awk '{s=s+$2; if (s <= x) {printf "%s ", $1; } }' \
-            x=${NGRAM_FRAC} > ${TRAIN_NGRAMS_FILE}
-        check_file_readable ${TRAIN_NGRAMS_FILE}
-    fi
 
-    local counter=0
-    for font in "${FONTS[@]}"; do
-        generate_font_image "${font}" &
-        let counter=counter+1
-        let rem=counter%par_factor
-        if [[ "${rem}" -eq 0 ]]; then
-          wait
+    for EXPOSURE in $EXPOSURES; do
+        if (( ${EXTRACT_FONT_PROPERTIES} )) && [[ -r ${BIGRAM_FREQS_FILE} ]]; then
+            # Parse .bigram_freqs file and compose a .train_ngrams file with text
+            # for tesseract to recognize during training. Take only the ngrams whose
+            # combined weight accounts for 95% of all the bigrams in the language.
+            NGRAM_FRAC=$(cat ${BIGRAM_FREQS_FILE} \
+                | awk '{s=s+$2}; END {print (s/100)*p}' p=99)
+            cat ${BIGRAM_FREQS_FILE} | sort -rnk2 \
+                | awk '{s=s+$2; if (s <= x) {printf "%s ", $1; } }' \
+                x=${NGRAM_FRAC} > ${TRAIN_NGRAMS_FILE}
+            check_file_readable ${TRAIN_NGRAMS_FILE}
         fi
-    done
-    wait
-    # Check that each process was successful.
-    for font in "${FONTS[@]}"; do
-        local fontname=$(echo ${font} | tr ' ' '_' | sed 's/,//g')
-        local outbase=${TRAINING_DIR}/${LANG_CODE}.${fontname}.exp${EXPOSURE}
-        check_file_readable ${outbase}.box ${outbase}.tif
+
+        local counter=0
+        for font in "${FONTS[@]}"; do
+            generate_font_image "${font}" &
+            let counter=counter+1
+            let rem=counter%par_factor
+            if [[ "${rem}" -eq 0 ]]; then
+              wait
+            fi
+        done
+        wait
+        # Check that each process was successful.
+        for font in "${FONTS[@]}"; do
+            local fontname=$(echo ${font} | tr ' ' '_' | sed 's/,//g')
+            local outbase=${TRAINING_DIR}/${LANG_CODE}.${fontname}.exp${EXPOSURE}
+            check_file_readable ${outbase}.box ${outbase}.tif
+        done
     done
 }
 
@@ -291,7 +275,7 @@ phase_UP_generate_unicharset() {
     tlog "\n=== Phase UP: Generating unicharset and unichar properties files ==="
 
     local box_files=$(ls ${TRAINING_DIR}/*.box)
-    run_command ${UNICHARSET_EXTRACTOR_EXE} -D "${TRAINING_DIR}/" ${box_files}
+    run_command unicharset_extractor -D "${TRAINING_DIR}/" ${box_files}
     local outfile=${TRAINING_DIR}/unicharset
     UNICHARSET_FILE="${TRAINING_DIR}/${LANG_CODE}.unicharset"
     check_file_readable ${outfile}
@@ -299,7 +283,7 @@ phase_UP_generate_unicharset() {
 
     XHEIGHTS_FILE="${TRAINING_DIR}/${LANG_CODE}.xheights"
     check_file_readable ${UNICHARSET_FILE}
-    run_command ${SET_UNICHARSET_PROPERTIES_EXE} \
+    run_command set_unicharset_properties \
         -U ${UNICHARSET_FILE} -O ${UNICHARSET_FILE} -X ${XHEIGHTS_FILE} \
         --script_dir=${LANGDATA_ROOT}
     check_file_readable ${XHEIGHTS_FILE}
@@ -327,7 +311,7 @@ phase_D_generate_dawg() {
     if [[ -s ${WORDLIST_FILE} ]]; then
         tlog "Generating word Dawg"
         check_file_readable ${UNICHARSET_FILE}
-        run_command ${WORDLIST2DAWG_EXE} -r 1 ${WORDLIST_FILE} ${WORD_DAWG} \
+        run_command wordlist2dawg -r 1 ${WORDLIST_FILE} ${WORD_DAWG} \
             ${UNICHARSET_FILE}
         check_file_readable ${WORD_DAWG}
 
@@ -339,13 +323,13 @@ phase_D_generate_dawg() {
     if [[ -s ${freq_wordlist_file} ]]; then
         check_file_readable ${UNICHARSET_FILE}
         tlog "Generating frequent-word Dawg"
-        run_command ${WORDLIST2DAWG_EXE}  -r 1 ${freq_wordlist_file} \
+        run_command wordlist2dawg  -r 1 ${freq_wordlist_file} \
             ${FREQ_DAWG} ${UNICHARSET_FILE}
         check_file_readable ${FREQ_DAWG}
     fi
 
     # Punctuation DAWG
-    # -r arguments to WORDLIST2DAWG_EXE denote RTL reverse policy
+    # -r arguments to wordlist2dawg denote RTL reverse policy
     # (see Trie::RTLReversePolicy enum in third_party/tesseract/dict/trie.h).
     # We specify 0/RRP_DO_NO_REVERSE when generating number DAWG,
     # 1/RRP_REVERSE_IF_HAS_RTL for freq and word DAWGS,
@@ -360,20 +344,20 @@ phase_D_generate_dawg() {
         PUNC_FILE="${LANGDATA_ROOT}/common.punc"
     fi
     check_file_readable ${PUNC_FILE}
-    run_command ${WORDLIST2DAWG_EXE} -r ${punc_reverse_policy} \
+    run_command wordlist2dawg -r ${punc_reverse_policy} \
         ${PUNC_FILE} ${PUNC_DAWG} ${UNICHARSET_FILE}
     check_file_readable ${PUNC_DAWG}
 
     # Numbers DAWG
     if [[ -s ${NUMBERS_FILE} ]]; then
-        run_command ${WORDLIST2DAWG_EXE} -r 0 \
+        run_command wordlist2dawg -r 0 \
             ${NUMBERS_FILE} ${NUMBER_DAWG} ${UNICHARSET_FILE}
         check_file_readable ${NUMBER_DAWG}
     fi
 
     # Bigram dawg
     if [[ -s ${WORD_BIGRAMS_FILE} ]]; then
-        run_command ${WORDLIST2DAWG_EXE} -r 1 \
+        run_command wordlist2dawg -r 1 \
             ${WORD_BIGRAMS_FILE} ${BIGRAM_DAWG} ${UNICHARSET_FILE}
         check_file_readable ${BIGRAM_DAWG}
     fi
@@ -387,10 +371,9 @@ phase_E_extract_features() {
         par_factor=1
     fi
     tlog "\n=== Phase E: Extracting features ==="
-    TRAIN_EXPOSURES='0'
 
     local img_files=""
-    for exposure in ${TRAIN_EXPOSURES}; do
+    for exposure in ${EXPOSURES}; do
         img_files=${img_files}' '$(ls ${TRAINING_DIR}/*.exp${exposure}.tif)
     done
 
@@ -405,7 +388,7 @@ phase_E_extract_features() {
     tlog "Using TESSDATA_PREFIX=${TESSDATA_PREFIX}"
     local counter=0
     for img_file in ${img_files}; do
-        run_command ${TESSERACT_EXE} ${img_file} ${img_file%.*} \
+        run_command tesseract ${img_file} ${img_file%.*} \
             ${box_config} ${config} &
       let counter=counter+1
       let rem=counter%par_factor
@@ -427,7 +410,7 @@ phase_C_cluster_prototypes() {
     tlog "\n=== Phase C: Clustering feature prototypes (cnTraining) ==="
     local out_normproto=$1
 
-    run_command ${CN_TRAINING_EXE} -D "${TRAINING_DIR}/" \
+    run_command cntraining -D "${TRAINING_DIR}/" \
         $(ls ${TRAINING_DIR}/*.tr)
 
     check_file_readable ${TRAINING_DIR}/normproto
@@ -447,7 +430,7 @@ phase_S_cluster_shapes() {
         font_props=${font_props}" -X ${TRAINING_DIR}/${LANG_CODE}.xheights"
     fi
 
-    run_command ${SHAPE_TRAINING_EXE} \
+    run_command shapeclustering \
         -D "${TRAINING_DIR}/" \
         -U ${TRAINING_DIR}/${LANG_CODE}.unicharset \
         -O ${TRAINING_DIR}/${LANG_CODE}.mfunicharset \
@@ -468,7 +451,7 @@ phase_M_cluster_microfeatures() {
         font_props=${font_props}" -X ${TRAINING_DIR}/${LANG_CODE}.xheights"
     fi
 
-    run_command ${MF_TRAINING_EXE} \
+    run_command mftraining \
         -D "${TRAINING_DIR}/" \
         -U ${TRAINING_DIR}/${LANG_CODE}.unicharset \
         -O ${TRAINING_DIR}/${LANG_CODE}.mfunicharset \
@@ -528,7 +511,7 @@ make__traineddata() {
   fi
 
   # Compose the traineddata file.
-  run_command ${COMBINE_TESSDATA_EXE} ${TRAINING_DIR}/${LANG_CODE}.
+  run_command combine_tessdata ${TRAINING_DIR}/${LANG_CODE}.
 
   # Copy it to the output dir, overwriting only if allowed by the cmdline flag.
   if [[ ! -d ${OUTPUT_DIR} ]]; then
diff --git a/viewer/svutil.cpp b/viewer/svutil.cpp
index a820eafbc5..f94c1c86d5 100644
--- a/viewer/svutil.cpp
+++ b/viewer/svutil.cpp
@@ -127,7 +127,7 @@ SVSemaphore::SVSemaphore() {
   semaphore_ = CreateSemaphore(0, 0, 10, 0);
 #elif defined(__APPLE__)
   char name[50];
-  snprintf(name, sizeof(name), "%d", random());
+  snprintf(name, sizeof(name), "%ld", random());
   sem_unlink(name);
   semaphore_ = sem_open(name, O_CREAT , S_IWUSR, 0);
   if (semaphore_ == SEM_FAILED) {
@@ -296,14 +296,11 @@ static std::string ScrollViewCommand(std::string scrollview_path) {
   // this unnecessary.
   // Also the path has to be separated by ; on windows and : otherwise.
 #ifdef _WIN32
-  const char* cmd_template = "-Djava.library.path=%s -cp %s/ScrollView.jar;"
-      "%s/piccolo2d-core-3.0.jar:%s/piccolo2d-extras-3.0.jar"
-      " com.google.scrollview.ScrollView";
+  const char* cmd_template = "-Djava.library.path=%s -jar %s/ScrollView.jar";
+
 #else
   const char* cmd_template = "-c \"trap 'kill %%1' 0 1 2 ; java "
-      "-Xms1024m -Xmx2048m -Djava.library.path=%s -cp %s/ScrollView.jar:"
-      "%s/piccolo2d-core-3.0.jar:%s/piccolo2d-extras-3.0.jar"
-      " com.google.scrollview.ScrollView"
+      "-Xms1024m -Xmx2048m -jar %s/ScrollView.jar"
       " & wait\"";
 #endif
   int cmdlen = strlen(cmd_template) + 4*strlen(scrollview_path.c_str()) + 1;
@@ -374,7 +371,7 @@ static int GetAddrInfo(const char* hostname, int port,
                        struct addrinfo** address) {
 #if defined(__linux__)
   char port_str[40];
-  snprintf(port_str, 40, "%d", port);
+  snprintf(port_str, 40, "%ld", port);
   return getaddrinfo(hostname, port_str, NULL, address);
 #else
   return GetAddrInfoNonLinux(hostname, port, address);
diff --git a/wordrec/lm_state.h b/wordrec/lm_state.h
index c87745b75a..623bbb5e7f 100644
--- a/wordrec/lm_state.h
+++ b/wordrec/lm_state.h
@@ -177,11 +177,11 @@ struct ViterbiStateEntry : public ELIST_LINK {
   /// the smallest rating or lower/upper case letters).
   LanguageModelFlagsType top_choice_flags;
 
-  /// Extra information maintained by Dawg laguage model component
+  /// Extra information maintained by Dawg language model component
   /// (owned by ViterbiStateEntry).
   LanguageModelDawgInfo *dawg_info;
 
-  /// Extra information maintained by Ngram laguage model component
+  /// Extra information maintained by Ngram language model component
   /// (owned by ViterbiStateEntry).
   LanguageModelNgramInfo *ngram_info;
 
diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp
index 1818478c66..04e340396e 100644
--- a/wordrec/pieces.cpp
+++ b/wordrec/pieces.cpp
@@ -273,7 +273,7 @@ void Wordrec::merge_and_put_fragment_lists(inT16 row, inT16 column,
  *
  * Recursively go through the ratings matrix to find lists of fragments
  * to be merged in the function merge_and_put_fragment_lists.
- * current_frag is the postion of the piece we are looking for.
+ * current_frag is the position of the piece we are looking for.
  * current_row is the row in the rating matrix we are currently at.
  * start is the row we started initially, so that we can know where
  * to append the results to the matrix. num_frag_parts is the total
diff --git a/wordrec/wordrec.h b/wordrec/wordrec.h
index 38f09f23d2..fb54ccae08 100644
--- a/wordrec/wordrec.h
+++ b/wordrec/wordrec.h
@@ -375,7 +375,7 @@ class Wordrec : public Classify {
                        inT16 num_blobs);
   // Recursively go through the ratings matrix to find lists of fragments
   // to be merged in the function merge_and_put_fragment_lists.
-  // current_frag is the postion of the piece we are looking for.
+  // current_frag is the position of the piece we are looking for.
   // current_row is the row in the rating matrix we are currently at.
   // start is the row we started initially, so that we can know where
   // to append the results to the matrix. num_frag_parts is the total