21 #pragma warning(disable:4244) // Conversion warnings
40 #include "config_auto.h"
49 "Force using vertical text page mode");
51 "find horizontal lines such as headers in vertical page mode");
53 "Fraction of textlines deemed vertical to use vertical page mode");
124 denorm_(
NULL), grid_box_(bleft, tright), rerotation_(1.0
f, 0.0
f) {
127 initial_widths_win_ =
NULL;
129 diacritics_win_ =
NULL;
130 textlines_win_ =
NULL;
131 smoothed_win_ =
NULL;
135 if (widths_win_ !=
NULL) {
136 #ifndef GRAPHICS_DISABLED
138 #endif // GRAPHICS_DISABLED
144 delete initial_widths_win_;
146 delete textlines_win_;
147 delete smoothed_win_;
148 delete diacritics_win_;
157 BLOBNBOX_IT blob_it(&block->
blobs);
158 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
159 SetNeighbours(
false,
false, blob_it.data());
171 InsertBlobs(input_block);
173 while (cjk_merge && FixBrokenCJK(input_block));
175 FindTextlineFlowDirection(
false);
181 static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
182 int* num_vertical_blobs,
183 int* num_horizontal_blobs,
184 BLOBNBOX_CLIST* vertical_blobs,
185 BLOBNBOX_CLIST* horizontal_blobs,
186 BLOBNBOX_CLIST* nondescript_blobs) {
187 BLOBNBOX_C_IT v_it(vertical_blobs);
188 BLOBNBOX_C_IT h_it(horizontal_blobs);
189 BLOBNBOX_C_IT n_it(nondescript_blobs);
190 BLOBNBOX_IT blob_it(input_blobs);
191 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
194 float y_x =
static_cast<float>(box.
height()) / box.
width();
195 float x_y = 1.0f / y_x;
197 float ratio = x_y > y_x ? x_y : y_x;
201 ++*num_vertical_blobs;
202 if (ok_blob) v_it.add_after_then_move(blob);
204 ++*num_horizontal_blobs;
205 if (ok_blob) h_it.add_after_then_move(blob);
206 }
else if (ok_blob) {
207 n_it.add_after_then_move(blob);
220 BLOBNBOX_CLIST* osd_blobs) {
224 int vertical_boxes = 0;
225 int horizontal_boxes = 0;
227 BLOBNBOX_CLIST vertical_blobs;
228 BLOBNBOX_CLIST horizontal_blobs;
229 BLOBNBOX_CLIST nondescript_blobs;
230 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes,
231 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
232 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes,
233 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
235 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
236 horizontal_boxes, vertical_boxes,
237 horizontal_blobs.length(), vertical_blobs.length(),
238 nondescript_blobs.length());
239 if (osd_blobs !=
NULL && vertical_boxes == 0 && horizontal_boxes == 0) {
241 BLOBNBOX_C_IT osd_it(osd_blobs);
242 osd_it.add_list_after(&nondescript_blobs);
245 int min_vert_boxes =
static_cast<int>((vertical_boxes + horizontal_boxes) *
247 if (vertical_boxes >= min_vert_boxes) {
248 if (osd_blobs !=
NULL) {
249 BLOBNBOX_C_IT osd_it(osd_blobs);
250 osd_it.add_list_after(&vertical_blobs);
254 if (osd_blobs !=
NULL) {
255 BLOBNBOX_C_IT osd_it(osd_blobs);
256 osd_it.add_list_after(&horizontal_blobs);
267 rerotation_.
set_x(rotation.
x());
268 rerotation_.
set_y(-rotation.
y());
276 ColPartition_LIST leader_parts;
277 FindLeadersAndMarkNoise(block, &leader_parts);
281 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
284 MarkLeaderNeighbours(part,
LR_LEFT);
285 MarkLeaderNeighbours(part,
LR_RIGHT);
306 TBOX search_box = box;
307 search_box.
pad(padding, padding);
314 rsearch.StartRectSearch(search_box);
315 while ((n = rsearch.NextRectSearch()) !=
NULL) {
316 if (n == bbox)
continue;
318 if (nbox.
height() > max_size) {
323 tprintf(
"Max neighbour size=%d for candidate line box at:", max_size);
327 #ifndef GRAPHICS_DISABLED
328 if (leaders_win_ !=
NULL) {
335 #endif // GRAPHICS_DISABLED
362 ColPartition_LIST* big_parts) {
363 nontext_map_ = nontext_pix;
364 projection_ = projection;
374 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
377 FindTextlineFlowDirection(
true);
389 FindInitialPartitions(rerotation, block, part_grid, big_parts);
395 static void PrintBoxWidths(
BLOBNBOX* neighbour) {
397 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
410 FCOORD click(static_cast<float>(x), static_cast<float>(y));
414 PrintBoxWidths(neighbour);
425 tprintf(
"Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
426 "Good= %d %d %d %d\n",
449 void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK* block,
450 ColPartition_LIST* leader_parts) {
456 gsearch.StartFullSearch();
457 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
458 SetNeighbours(
true,
false, bbox);
460 ColPartition_IT part_it(leader_parts);
461 gsearch.StartFullSearch();
462 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
477 if (part->MarkAsLeaderIfMonospaced())
478 part_it.add_after_then_move(part);
484 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
488 BLOBNBOX_IT blob_it(&block->
blobs);
490 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
496 blob_it.add_to_end(small_it.extract());
503 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
506 small_it.add_to_end(noise_it.extract());
518 void StrokeWidth::InsertBlobs(
TO_BLOCK* block) {
526 void StrokeWidth::MarkLeaderNeighbours(
const ColPartition* part,
528 const TBOX& part_box = part->bounding_box();
533 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.
left()
537 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) !=
NULL) {
541 int x_gap = blob_box.
x_gap(part_box);
544 }
else if (best_blob ==
NULL || x_gap < best_gap) {
549 if (best_blob !=
NULL) {
554 #ifndef GRAPHICS_DISABLED
555 if (leaders_win_ !=
NULL) {
561 #endif // GRAPHICS_DISABLED
566 static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST* blobs) {
568 BLOBNBOX_IT it(blobs);
569 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
574 sizes.add(height, 1);
576 return static_cast<int>(sizes.ile(0.75
f) + 0.5);
584 bool StrokeWidth::FixBrokenCJK(
TO_BLOCK* block) {
585 BLOBNBOX_LIST* blobs = &block->
blobs;
586 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
590 BLOBNBOX_IT blob_it(blobs);
592 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
600 tprintf(
"Checking for Broken CJK (max size=%d):", max_size);
604 BLOBNBOX_CLIST overlapped_blobs;
605 AccumulateOverlaps(blob, debug, max_size, max_dist,
606 &bbox, &overlapped_blobs);
607 if (!overlapped_blobs.empty()) {
611 if (bbox.
width() > bbox.
height() * kCJKAspectRatio ||
614 tprintf(
"Bad final aspectratio:");
622 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
626 BLOBNBOX_C_IT n_it(&overlapped_blobs);
627 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
629 neighbour = n_it.data();
634 if (!n_it.cycled_list()) {
637 PrintBoxWidths(blob);
647 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
651 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
664 int num_remaining = 0;
665 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
685 static bool AcceptableCJKMerge(
const TBOX& bbox,
const TBOX& nbox,
686 bool debug,
int max_size,
int max_dist,
687 int* x_gap,
int* y_gap) {
688 *x_gap = bbox.
x_gap(nbox);
689 *y_gap = bbox.
y_gap(nbox);
693 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
696 if (*x_gap <= max_dist && *y_gap <= max_dist &&
697 merged.width() <= max_size && merged.height() <= max_size) {
699 double old_ratio =
static_cast<double>(bbox.
width()) / bbox.
height();
700 if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
701 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
702 if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
713 void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX* not_this,
bool debug,
714 int max_size,
int max_dist,
715 TBOX* bbox, BLOBNBOX_CLIST* blobs) {
724 int x = (bbox->
left() + bbox->
right()) / 2;
725 int y = (bbox->
bottom() + bbox->
top()) / 2;
730 while ((neighbour = radsearch.NextRadSearch()) !=
NULL) {
731 if (neighbour == not_this)
continue;
734 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
738 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
744 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
745 if (nearests[dir] ==
NULL)
continue;
747 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
748 max_dist, &x_gap, &y_gap)) {
751 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
756 nearests[dir] =
NULL;
760 }
else if (x_gap < 0 && x_gap <= y_gap) {
763 if (nearests[dir] ==
NULL ||
764 y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
765 nearests[dir] = neighbour;
767 }
else if (y_gap < 0 && y_gap <= x_gap) {
770 if (nearests[dir] ==
NULL ||
771 x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
772 nearests[dir] = neighbour;
781 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
782 if (nearests[dir] ==
NULL)
continue;
785 tprintf(
"Testing for overlap with:");
789 blobs->shallow_clear();
791 tprintf(
"Final box overlaps nearest\n");
804 void StrokeWidth::FindTextlineFlowDirection(
bool display_if_debugging) {
808 gsearch.StartFullSearch();
809 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
810 SetNeighbours(
false, display_if_debugging, bbox);
813 gsearch.StartFullSearch();
814 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
815 SimplifyObviousNeighbours(bbox);
818 gsearch.StartFullSearch();
819 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
820 SetNeighbourFlows(bbox);
824 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
827 gsearch.StartFullSearch();
828 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
829 SmoothNeighbourTypes(bbox,
false);
832 gsearch.StartFullSearch();
833 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
834 SmoothNeighbourTypes(bbox,
true);
837 gsearch.StartFullSearch();
838 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
839 SmoothNeighbourTypes(bbox,
true);
843 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
851 void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap,
853 int line_trap_count = 0;
854 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
856 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
858 if (line_trap_count > 0 && activate_line_trap) {
880 tprintf(
"FGN in dir %d for blob:", dir);
883 int top = blob_box.
top();
884 int bottom = blob_box.
bottom();
885 int left = blob_box.
left();
886 int right = blob_box.
right();
887 int width = right - left;
888 int height = top - bottom;
896 int line_trap_count = 0;
899 ? height / 2 : width / 2;
901 ? height / 3 : width / 3;
903 min_good_overlap = min_decent_overlap = 1;
905 int search_pad =
static_cast<int>(
909 TBOX search_box = blob_box;
922 search_box.
set_top(search_box.
top() + search_pad);
929 rectsearch.StartRectSearch(search_box);
931 double best_goodness = 0.0;
932 bool best_is_good =
false;
934 while ((neighbour = rectsearch.NextRectSearch()) !=
NULL) {
936 if (neighbour == blob)
938 int mid_x = (nbox.
left() + nbox.
right()) / 2;
939 if (mid_x < blob->left_rule() || mid_x > blob->
right_rule())
948 int n_width = nbox.
width();
949 int n_height = nbox.
height();
950 if (
MIN(n_width, n_height) > line_trap_min &&
951 MAX(n_width, n_height) < line_trap_max)
957 MAX(width, height)) &&
962 if (debug)
tprintf(
"Bad size\n");
976 perp_overlap = nbox.
width();
978 perp_overlap = overlap;
981 if (debug)
tprintf(
"On wrong side\n");
988 perp_overlap = nbox.
height();
990 perp_overlap = overlap;
993 if (debug)
tprintf(
"On wrong side\n");
998 if (-gap > overlap) {
999 if (debug)
tprintf(
"Overlaps wrong way\n");
1002 if (perp_overlap < min_decent_overlap) {
1003 if (debug)
tprintf(
"Doesn't overlap enough\n");
1008 bool is_good = overlap >= min_good_overlap && !bad_sizes &&
1015 if (gap < 1) gap = 1;
1016 double goodness = (1.0 + is_good) * overlap / gap;
1018 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
1019 goodness, best_goodness, is_good, overlap, gap);
1021 if (goodness > best_goodness) {
1022 best_neighbour = neighbour;
1023 best_goodness = goodness;
1024 best_is_good = is_good;
1028 return line_trap_count;
1032 static void ListNeighbours(
const BLOBNBOX* blob,
1033 BLOBNBOX_CLIST* neighbours) {
1034 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1037 if (neighbour !=
NULL) {
1038 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1044 static void List2ndNeighbours(
const BLOBNBOX* blob,
1045 BLOBNBOX_CLIST* neighbours) {
1046 ListNeighbours(blob, neighbours);
1047 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1050 if (neighbour !=
NULL) {
1051 ListNeighbours(neighbour, neighbours);
1057 static void List3rdNeighbours(
const BLOBNBOX* blob,
1058 BLOBNBOX_CLIST* neighbours) {
1059 List2ndNeighbours(blob, neighbours);
1060 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1063 if (neighbour !=
NULL) {
1064 List2ndNeighbours(neighbour, neighbours);
1071 static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST* neighbours,
1072 int* pure_h_count,
int* pure_v_count) {
1075 BLOBNBOX_C_IT it(neighbours);
1076 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1078 int h_min, h_max, v_min, v_max;
1081 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1082 if (h_max < v_min ||
1086 if (debug)
tprintf(
"Horz at:");
1087 }
else if (v_max < h_min) {
1090 if (debug)
tprintf(
"Vert at:");
1092 if (debug)
tprintf(
"Neither at:");
1102 void StrokeWidth::SetNeighbourFlows(
BLOBNBOX* blob) {
1108 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:",
1112 BLOBNBOX_CLIST neighbours;
1113 List3rdNeighbours(blob, &neighbours);
1115 int pure_h_count = 0;
1116 int pure_v_count = 0;
1117 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1121 tprintf(
"SetFlows: h_count=%d, v_count=%d\n",
1122 pure_h_count, pure_v_count);
1124 if (!neighbours.empty()) {
1127 if (pure_h_count > 2 * pure_v_count) {
1130 }
else if (pure_v_count > 2 * pure_h_count) {
1143 static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
1144 int* pure_h_count,
int* pure_v_count) {
1145 BLOBNBOX_C_IT it(neighbours);
1146 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1158 void StrokeWidth::SimplifyObviousNeighbours(
BLOBNBOX* blob) {
1180 int h_min, h_max, v_min, v_max;
1182 if ((h_max + margin < v_min && h_max < margin / 2) ||
1187 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1197 void StrokeWidth::SmoothNeighbourTypes(
BLOBNBOX* blob,
bool reset_all) {
1200 BLOBNBOX_CLIST neighbours;
1201 List2ndNeighbours(blob, &neighbours);
1203 int pure_h_count = 0;
1204 int pure_v_count = 0;
1205 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1210 tprintf(
"pure_h=%d, pure_v=%d\n",
1211 pure_h_count, pure_v_count);
1213 if (pure_h_count > pure_v_count) {
1217 }
else if (pure_v_count > pure_h_count) {
1226 tprintf(
"Clean on pass 3!\n");
1235 void StrokeWidth::FindInitialPartitions(
const FCOORD& rerotation,
1237 ColPartitionGrid* part_grid,
1238 ColPartition_LIST* big_parts) {
1239 FindVerticalTextChains(part_grid);
1240 FindHorizontalTextChains(part_grid);
1242 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1243 part_grid->DisplayBoxes(chains_win_);
1246 part_grid->SplitOverlappingPartitions(big_parts);
1247 EasyMerges(part_grid);
1248 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1250 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1253 grid_box, rerotation));
1254 TestDiacritics(part_grid, block);
1255 MergeDiacritics(block, part_grid);
1257 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1258 part_grid->DisplayBoxes(textlines_win_);
1259 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1261 PartitionRemainingBlobs(part_grid);
1262 part_grid->SplitOverlappingPartitions(big_parts);
1263 EasyMerges(part_grid);
1264 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1267 grid_box, rerotation));
1270 grid_box, rerotation));
1272 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1273 part_grid->DisplayBoxes(smoothed_win_);
1293 void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
1296 gsearch.StartFullSearch();
1297 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1306 while (blob !=
NULL) {
1308 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1310 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1311 while (blob !=
NULL) {
1313 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1315 CompletePartition(part, part_grid);
1336 void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
1339 gsearch.StartFullSearch();
1340 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1347 while (blob !=
NULL) {
1349 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1351 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1352 while (blob !=
NULL) {
1354 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1356 CompletePartition(part, part_grid);
1368 void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid,
TO_BLOCK* block) {
1371 small_grid.InsertBlobList(&block->
blobs);
1372 int medium_diacritics = 0;
1373 int small_diacritics = 0;
1375 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1378 DiacriticBlob(&small_grid, blob)) {
1382 BLOBNBOX_IT blob_it(&block->
blobs);
1383 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1386 small_it.add_to_end(blob_it.extract());
1389 ColPartition* part = blob->
owner();
1390 if (part ==
NULL && DiacriticBlob(&small_grid, blob)) {
1391 ++medium_diacritics;
1393 small_it.add_to_end(blob_it.extract());
1394 }
else if (part !=
NULL && !part->block_owned() &&
1395 part->boxes_count() < 3) {
1401 BLOBNBOX_C_IT box_it(part->boxes());
1402 for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
1403 DiacriticBlob(&small_grid, box_it.data());
1405 if (box_it.cycled_list()) {
1407 while (!box_it.empty()) {
1416 ++medium_diacritics;
1423 small_it.add_to_end(blob_it.extract());
1424 part_grid->RemoveBBox(part);
1429 tprintf(
"Blob not available to be a diacritic at:");
1434 tprintf(
"Found %d small diacritics, %d medium\n",
1435 small_diacritics, medium_diacritics);
1445 bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid,
BLOBNBOX* blob) {
1451 small_box.bottom());
1453 tprintf(
"Testing blob for diacriticness at:");
1456 int x = (small_box.left() + small_box.right()) / 2;
1457 int y = (small_box.bottom() + small_box.top()) / 2;
1460 int height = small_box.height();
1475 int best_total_dist = 0;
1479 TBOX search_box(small_box);
1482 search_box.
pad(x_pad, y_pad);
1484 rsearch.SetUniqueMode(
true);
1486 rsearch.StartRectSearch(search_box);
1488 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1490 neighbour == blob || neighbour->
owner() == blob->
owner())
1497 tprintf(
"Neighbour not strong enough:");
1502 if (nbox.
height() < min_height) {
1504 tprintf(
"Neighbour not big enough:");
1509 int x_gap = small_box.x_gap(nbox);
1510 int y_gap = small_box.y_gap(nbox);
1514 if (debug)
tprintf(
"xgap=%d, y=%d, total dist=%d\n",
1515 x_gap, y_gap, total_distance);
1516 if (total_distance >
1519 tprintf(
"Neighbour with median size %d too far away:",
1527 tprintf(
"Computing reduced box for :");
1530 int left = small_box.left() - small_box.width();
1531 int right = small_box.right() + small_box.width();
1533 y_gap = small_box.
y_gap(nbox);
1534 if (best_x_overlap ==
NULL || y_gap < best_y_gap) {
1535 best_x_overlap = neighbour;
1543 tprintf(
"Shrunken box doesn't win:");
1547 if (best_y_overlap ==
NULL || total_distance < best_total_dist) {
1549 tprintf(
"New best y overlap:");
1552 best_y_overlap = neighbour;
1553 best_total_dist = total_distance;
1555 tprintf(
"New y overlap box doesn't win:");
1559 tprintf(
"Neighbour wrong side of a tab:");
1563 if (best_x_overlap !=
NULL &&
1564 (best_y_overlap ==
NULL ||
1569 tprintf(
"DiacriticBlob OK! (x-overlap:");
1575 if (best_y_overlap !=
NULL &&
1576 DiacriticXGapFilled(small_grid, small_box,
1578 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box())) {
1582 tprintf(
"DiacriticBlob OK! (y-overlap:");
1589 tprintf(
"DiacriticBlob fails:");
1591 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1592 if (best_y_overlap !=
NULL) {
1593 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1594 DiacriticXGapFilled(small_grid, small_box,
1596 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box()));
1615 bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
1616 const TBOX& diacritic_box,
1617 const TBOX& base_box) {
1621 TBOX occupied_box(base_box);
1623 while ((diacritic_gap = diacritic_box.
x_gap(occupied_box)) > max_gap) {
1624 TBOX search_box(occupied_box);
1625 if (diacritic_box.
left() > search_box.
right()) {
1635 rsearch.StartRectSearch(search_box);
1637 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1639 if (nbox.
x_gap(diacritic_box) < diacritic_gap) {
1640 if (nbox.
left() < occupied_box.left())
1642 if (nbox.
right() > occupied_box.right())
1643 occupied_box.set_right(nbox.
right());
1647 if (neighbour ==
NULL)
1654 void StrokeWidth::MergeDiacritics(
TO_BLOCK* block,
1655 ColPartitionGrid* part_grid) {
1657 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1663 if (part !=
NULL && !part->block_owned() && blob->
owner() ==
NULL &&
1667 part_grid->RemoveBBox(part);
1672 part_grid->InsertBBox(
true,
true, part);
1683 void StrokeWidth::RemoveLargeUnusedBlobs(
TO_BLOCK* block,
1684 ColPartitionGrid* part_grid,
1685 ColPartition_LIST* big_parts) {
1687 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1689 ColPartition* big_part = blob->
owner();
1690 if (big_part ==
NULL) {
1700 void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
1703 int prev_grid_x = -1;
1704 int prev_grid_y = -1;
1705 BLOBNBOX_CLIST cell_list;
1706 BLOBNBOX_C_IT cell_it(&cell_list);
1707 bool cell_all_noise =
true;
1708 gsearch.StartFullSearch();
1709 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1710 int grid_x = gsearch.GridX();
1711 int grid_y = gsearch.GridY();
1712 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1714 MakePartitionsFromCellList(cell_all_noise, part_grid, &cell_list);
1715 cell_it.set_to_list(&cell_list);
1716 prev_grid_x = grid_x;
1717 prev_grid_y = grid_y;
1718 cell_all_noise =
true;
1721 cell_it.add_to_end(bbox);
1723 cell_all_noise =
false;
1725 cell_all_noise =
false;
1728 MakePartitionsFromCellList(cell_all_noise, part_grid, &cell_list);
1733 void StrokeWidth::MakePartitionsFromCellList(
bool combine,
1734 ColPartitionGrid* part_grid,
1735 BLOBNBOX_CLIST* cell_list) {
1736 if (cell_list->empty())
1738 BLOBNBOX_C_IT cell_it(cell_list);
1740 BLOBNBOX* bbox = cell_it.extract();
1744 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1745 part->AddBox(cell_it.extract());
1747 CompletePartition(part, part_grid);
1749 for (; !cell_it.empty(); cell_it.forward()) {
1750 BLOBNBOX* bbox = cell_it.extract();
1754 CompletePartition(part, part_grid);
1761 void StrokeWidth::CompletePartition(ColPartition* part,
1762 ColPartitionGrid* part_grid) {
1763 part->ComputeLimits();
1768 part->SetRegionAndFlowTypesFromProjectionValue(value);
1770 part_grid->InsertBBox(
true,
true, part);
1775 void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
1784 bool StrokeWidth::OrientationSearchBox(ColPartition* part,
TBOX* box) {
1785 if (part->IsVerticalType()) {
1796 bool StrokeWidth::ConfirmEasyMerge(
const ColPartition* p1,
1797 const ColPartition* p2) {
1803 if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
1804 p1->HCoreOverlap(*p2) <= 0 &&
1805 ((!p1->IsSingleton() &&
1806 !p2->IsSingleton()) ||
1807 !p1->bounding_box().major_overlap(p2->bounding_box())))
1809 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
1810 p1->VCoreOverlap(*p2) <= 0 &&
1811 ((!p1->IsSingleton() &&
1812 !p2->IsSingleton()) ||
1813 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1814 !p1->OKDiacriticMerge(*p2,
false) &&
1815 !p2->OKDiacriticMerge(*p1,
false))))
1817 if (!p1->ConfirmNoTabViolation(*p2))
1821 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1825 bool StrokeWidth::NoNoiseInBetween(
const TBOX& box1,
const TBOX& box2)
const {
1833 ScrollView* StrokeWidth::DisplayGoodBlobs(
const char* window_name,
1836 #ifndef GRAPHICS_DISABLED
1843 gsearch.StartFullSearch();
1845 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1847 int left_x = box.
left();
1848 int right_x = box.
right();
1849 int top_y = box.
top();
1850 int bottom_y = box.
bottom();
1861 else if (goodness == 1)
1867 window->
Rectangle(left_x, bottom_y, right_x, top_y);
1878 int x = (blob_box.
left() + blob_box.
right()) / 2;
1879 #ifndef GRAPHICS_DISABLED
1880 window->
Line(x, top, x, bottom);
1881 #endif // GRAPHICS_DISABLED
1885 ScrollView* StrokeWidth::DisplayDiacritics(
const char* window_name,
1888 #ifndef GRAPHICS_DISABLED
1893 BLOBNBOX_IT it(&block->
blobs);
1894 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1897 window->
Pen(ScrollView::GREEN);
1898 DrawDiacriticJoiner(blob, window);
1906 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1909 window->
Pen(ScrollView::GREEN);
1910 DrawDiacriticJoiner(blob, window);