Skip to content

Commit

Permalink
Fixed issue 669
Browse files Browse the repository at this point in the history
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@743 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
theraysmith@gmail.com committed Sep 21, 2012
1 parent d71045f commit c7cef53
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 26 deletions.
48 changes: 28 additions & 20 deletions classify/shapetable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,17 @@
namespace tesseract {

// Writes to the given file. Returns false in case of error.
bool UnicharAndFonts::Serialize(FILE* fp) {
inT32 uni_id = unichar_id;
if (fwrite(&uni_id, sizeof(uni_id), 1, fp) != 1) return false;
bool UnicharAndFonts::Serialize(FILE* fp) const {
if (fwrite(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
if (!font_ids.Serialize(fp)) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool UnicharAndFonts::DeSerialize(bool swap, FILE* fp) {
inT32 uni_id;
if (fread(&uni_id, sizeof(uni_id), 1, fp) != 1) return false;
if (fread(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
if (swap)
ReverseN(&uni_id, sizeof(uni_id));
unichar_id = uni_id;
ReverseN(&unichar_id, sizeof(unichar_id));
if (!font_ids.DeSerialize(swap, fp)) return false;
return true;
}
Expand All @@ -55,17 +52,20 @@ int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) {
}

// Writes to the given file. Returns false in case of error.
bool Shape::Serialize(FILE* fp) {
if (fwrite(&unichars_sorted_, 1, 1, fp) != 1)
bool Shape::Serialize(FILE* fp) const {
uinT8 sorted = unichars_sorted_;
if (fwrite(&sorted, sizeof(sorted), 1, fp) != 1)
return false;
if (!unichars_.SerializeClasses(fp)) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Shape::DeSerialize(bool swap, FILE* fp) {
if (fread(&unichars_sorted_, 1, 1, fp) != 1)
uinT8 sorted;
if (fread(&sorted, sizeof(sorted), 1, fp) != 1)
return false;
unichars_sorted_ = sorted != 0;
if (!unichars_.DeSerializeClasses(swap, fp)) return false;
return true;
}
Expand Down Expand Up @@ -197,14 +197,26 @@ STRING ShapeTable::DebugStr(int shape_id) const {
const Shape& shape = GetShape(shape_id);
STRING result;
result.add_str_int("Shape", shape_id);
if (shape.size() > 100) {
result.add_str_int(" Num unichars=", shape.size());
return result;
}
for (int c = 0; c < shape.size(); ++c) {
result.add_str_int(" c_id=", shape[c].unichar_id);
result += "=";
result += unicharset_->id_to_unichar(shape[c].unichar_id);
result.add_str_int(", ", shape[c].font_ids.size());
result += " fonts =";
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
result.add_str_int(" ", shape[c].font_ids[f]);
if (shape.size() < 10) {
result.add_str_int(", ", shape[c].font_ids.size());
result += " fonts =";
int num_fonts = shape[c].font_ids.size();
if (num_fonts > 10) {
result.add_str_int(" ", shape[c].font_ids[0]);
result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
} else {
for (int f = 0; f < num_fonts; ++f) {
result.add_str_int(" ", shape[c].font_ids[f]);
}
}
}
}
return result;
Expand Down Expand Up @@ -327,12 +339,12 @@ int ShapeTable::BuildFromShape(const Shape& shape,
}

// Returns true if the shapes are already merged.
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) {
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const {
return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
}

// Returns true if any shape contains multiple unichars.
bool ShapeTable::AnyMultipleUnichars() {
bool ShapeTable::AnyMultipleUnichars() const {
int num_shapes = NumShapes();
for (int s1 = 0; s1 < num_shapes; ++s1) {
if (MasterDestinationIndex(s1) != s1) continue;
Expand Down Expand Up @@ -408,10 +420,6 @@ void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
shape_table_[master_id2]->set_destination_index(master_id1);
// Add all the shapes of master_id2 to master_id1.
shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
tprintf("Merged shape %d->%d, %d->%d, now with %d unichars: %s\n",
shape_id1, master_id1, shape_id2, master_id2,
shape_table_[master_id1]->size(),
DebugStr(master_id1).string());
}

// Returns the destination of this shape, (if merged), taking into account
Expand Down
12 changes: 6 additions & 6 deletions classify/shapetable.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@ struct UnicharAndFonts {
}

// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp);
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);

// Sort function to sort a pair of UnicharAndFonts by unichar_id.
static int SortByUnicharId(const void* v1, const void* v2);

GenericVector<int> font_ids;
int unichar_id;
GenericVector<inT32> font_ids;
inT32 unichar_id;
};

// A Shape is a collection of unichar-ids and a list of fonts associated with
Expand All @@ -64,7 +64,7 @@ class Shape {
Shape() : destination_index_(-1) {}

// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp);
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
Expand Down Expand Up @@ -188,9 +188,9 @@ class ShapeTable {
int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes);

// Returns true if the shapes are already merged.
bool AlreadyMerged(int shape_id1, int shape_id2);
bool AlreadyMerged(int shape_id1, int shape_id2) const;
// Returns true if any shape contains multiple unichars.
bool AnyMultipleUnichars();
bool AnyMultipleUnichars() const;
// Returns the maximum number of unichars over all shapes.
int MaxNumUnichars() const;
// Merges shapes with a common unichar over the [start, end) interval.
Expand Down

0 comments on commit c7cef53

Please sign in to comment.