diff --git a/api/baseapi.cpp b/api/baseapi.cpp
index cd6985da8e..f59363f939 100644
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@@ -1379,14 +1379,14 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it,
 }
 
 static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) {
-  unsigned long bufsize = base.length() + 2 * kMaxIntSize;
-  char id_buffer[bufsize];
+  const unsigned long BUFSIZE = 64;
+  char id_buffer[BUFSIZE];
   if (num2 >= 0) {
-    snprintf(id_buffer, bufsize - 1, "%s_%d_%d", base.c_str(), num1, num2);
+    snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
   } else {
-    snprintf(id_buffer, bufsize - 1, "%s_%d", base.c_str(), num1);
+    snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
   }
-  id_buffer[bufsize - 1] = '\0';
+  id_buffer[BUFSIZE - 1] = '\0';
   *hocr_str += " id='";
   *hocr_str += id_buffer;
   *hocr_str += "'";
@@ -1444,6 +1444,8 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
 
   int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
   int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
+  bool para_is_ltr = true; // Default direction is LTR
+  const char* paragraph_lang = NULL;
   bool font_info = false;
   GetBoolVariable("hocr_font_info", &font_info);
 
@@ -1493,18 +1495,24 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
 
     // Open any new block/paragraph/textline.
     if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+      para_is_ltr = true; // reset to default direction
       hocr_str += "   <div class='ocr_carea'";
       AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
       AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
     }
     if (res_it->IsAtBeginningOf(RIL_PARA)) {
       hocr_str += "\n    <p class='ocr_par'";
-      if (res_it->ParagraphIsLtr()) {
-        hocr_str += " dir='ltr'";
-      } else {
+      para_is_ltr = res_it->ParagraphIsLtr();
+      if (!para_is_ltr) {
         hocr_str += " dir='rtl'";
       }
       AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
+      paragraph_lang = res_it->WordRecognitionLanguage();
+      if (paragraph_lang) {
+          hocr_str += " lang='";
+          hocr_str += paragraph_lang;
+          hocr_str += "'";
+      }
       AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
     }
     if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
@@ -1537,14 +1545,18 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
       hocr_str.add_str_int("; x_fsize ", pointsize);
     }
     hocr_str += "'";
-    if (res_it->WordRecognitionLanguage()) {
+    const char* lang = res_it->WordRecognitionLanguage();
+    if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
       hocr_str += " lang='";
-      hocr_str += res_it->WordRecognitionLanguage();
+      hocr_str += lang;
       hocr_str += "'";
     }
     switch (res_it->WordDirection()) {
-      case DIR_LEFT_TO_RIGHT: hocr_str += " dir='ltr'"; break;
-      case DIR_RIGHT_TO_LEFT: hocr_str += " dir='rtl'"; break;
+      // Only emit direction if different from current paragraph direction
+      case DIR_LEFT_TO_RIGHT: if (!para_is_ltr) hocr_str += " dir='ltr'"; break;
+      case DIR_RIGHT_TO_LEFT: if (para_is_ltr) hocr_str += " dir='rtl'"; break;
+      case DIR_MIX:
+      case DIR_NEUTRAL:
       default:  // Do nothing.
         break;
     }
@@ -1574,6 +1586,7 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
     if (last_word_in_para) {
       hocr_str += "\n    </p>\n";
       pcnt++;
+      para_is_ltr = true; // back to default direction
     }
     if (last_word_in_block) {
       hocr_str += "   </div>\n";