Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
blobbox.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: blobbox.cpp (Formerly blobnbox.c)
3  * Description: Code for the textord blob class.
4  * Author: Ray Smith
5  * Created: Thu Jul 30 09:08:51 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "mfcpch.h"
26 #include "blobbox.h"
27 #include "helpers.h"
28 
29 #define PROJECTION_MARGIN 10 //arbitrary
30 #define EXTERN
31 
33 
34 // Upto 30 degrees is allowed for rotations of diacritic blobs.
35 const double kCosSmallAngle = 0.866;
36 // Min aspect ratio for a joined word to indicate an obvious flow direction.
37 const double kDefiniteAspectRatio = 2.0;
38 // Multiple of short length in perimeter to make a joined word.
39 const double kComplexShapePerimeterRatio = 1.5;
40 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
41 const double kMinMediumSizeRatio = 0.25;
42 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
43 const double kMaxMediumSizeRatio = 4.0;
44 
45 // Rotates the box and the underlying blob.
46 void BLOBNBOX::rotate(FCOORD rotation) {
47  cblob_ptr->rotate(rotation);
48  rotate_box(rotation);
49  compute_bounding_box();
50 }
51 
52 // Reflect the box in the y-axis, leaving the underlying blob untouched.
54  int left = -box.right();
55  box.set_right(-box.left());
56  box.set_left(left);
57 }
58 
59 // Rotates the box by the angle given by rotation.
60 // If the blob is a diacritic, then only small rotations for skew
61 // correction can be applied.
62 void BLOBNBOX::rotate_box(FCOORD rotation) {
63  if (IsDiacritic()) {
64  ASSERT_HOST(rotation.x() >= kCosSmallAngle)
65  ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
66  ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
67  top_pt.rotate(rotation);
68  base_char_top_ = top_pt.y();
69  bottom_pt.rotate(rotation);
70  base_char_bottom_ = bottom_pt.y();
71  box.rotate(rotation);
72  } else {
73  box.rotate(rotation);
74  set_diacritic_box(box);
75  }
76 }
77 
78 /**********************************************************************
79  * BLOBNBOX::merge
80  *
81  * Merge this blob with the given blob, which should be after this.
82  **********************************************************************/
83 void BLOBNBOX::merge( //merge blobs
84  BLOBNBOX *nextblob //blob to join with
85  ) {
86  box += nextblob->box; //merge boxes
87  set_diacritic_box(box);
88  nextblob->joined = TRUE;
89 }
90 
91 
92 // Merge this with other, taking the outlines from other.
93 // Other is not deleted, but left for the caller to handle.
95  if (cblob_ptr != NULL && other->cblob_ptr != NULL) {
96  C_OUTLINE_IT ol_it(cblob_ptr->out_list());
97  ol_it.add_list_after(other->cblob_ptr->out_list());
98  }
100 }
101 
102 
103 /**********************************************************************
104  * BLOBNBOX::chop
105  *
106  * Chop this blob into equal sized pieces using the x height as a guide.
107  * The blob is not actually chopped. Instead, fake blobs are inserted
108  * with the relevant bounding boxes.
109  **********************************************************************/
110 
111 void BLOBNBOX::chop( //chop blobs
112  BLOBNBOX_IT *start_it, //location of this
113  BLOBNBOX_IT *end_it, //iterator
114  FCOORD rotation, //for landscape
115  float xheight //of line
116  ) {
117  inT16 blobcount; //no of blobs
118  BLOBNBOX *newblob; //fake blob
119  BLOBNBOX *blob; //current blob
120  inT16 blobindex; //number of chop
121  inT16 leftx; //left edge of blob
122  float blobwidth; //width of each
123  float rightx; //right edge to scan
124  float ymin, ymax; //limits of new blob
125  float test_ymin, test_ymax; //limits of part blob
126  ICOORD bl, tr; //corners of box
127  BLOBNBOX_IT blob_it; //blob iterator
128 
129  //get no of chops
130  blobcount = (inT16) floor (box.width () / xheight);
131  if (blobcount > 1 && cblob_ptr != NULL) {
132  //width of each
133  blobwidth = (float) (box.width () + 1) / blobcount;
134  for (blobindex = blobcount - 1, rightx = box.right ();
135  blobindex >= 0; blobindex--, rightx -= blobwidth) {
136  ymin = (float) MAX_INT32;
137  ymax = (float) -MAX_INT32;
138  blob_it = *start_it;
139  do {
140  blob = blob_it.data ();
141  find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth,
142  rightx,
143  /*rotation, */ test_ymin, test_ymax);
144  blob_it.forward ();
145  UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
146  }
147  while (blob != end_it->data ());
148  if (ymin < ymax) {
149  leftx = (inT16) floor (rightx - blobwidth);
150  if (leftx < box.left ())
151  leftx = box.left (); //clip to real box
152  bl = ICOORD (leftx, (inT16) floor (ymin));
153  tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
154  if (blobindex == 0)
155  box = TBOX (bl, tr); //change box
156  else {
157  newblob = new BLOBNBOX;
158  //box is all it has
159  newblob->box = TBOX (bl, tr);
160  //stay on current
161  newblob->base_char_top_ = tr.y();
162  newblob->base_char_bottom_ = bl.y();
163  end_it->add_after_stay_put (newblob);
164  }
165  }
166  }
167  }
168 }
169 
170 // Returns the box gaps between this and its neighbours_ in an array
171 // indexed by BlobNeighbourDir.
172 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
173  for (int dir = 0; dir < BND_COUNT; ++dir) {
174  gaps[dir] = MAX_INT16;
175  BLOBNBOX* neighbour = neighbours_[dir];
176  if (neighbour != NULL) {
177  TBOX n_box = neighbour->bounding_box();
178  if (dir == BND_LEFT || dir == BND_RIGHT) {
179  gaps[dir] = box.x_gap(n_box);
180  } else {
181  gaps[dir] = box.y_gap(n_box);
182  }
183  }
184  }
185 }
186 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
187 // modified so that if the max exceeds the max dimension of the blob, and
188 // the min is less, the max is replaced with the min.
189 // The objective is to catch cases where there is only a single neighbour
190 // and avoid reporting the other gap as a ridiculously large number
191 void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max,
192  int* v_min, int* v_max) const {
193  int max_dimension = MAX(box.width(), box.height());
194  int gaps[BND_COUNT];
195  NeighbourGaps(gaps);
196  *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]);
197  *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]);
198  if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min;
199  *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]);
200  *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]);
201  if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min;
202 }
203 
204 // NULLs out any neighbours that are DeletableNoise to remove references.
206  for (int dir = 0; dir < BND_COUNT; ++dir) {
207  BLOBNBOX* neighbour = neighbours_[dir];
208  if (neighbour != NULL && neighbour->DeletableNoise()) {
209  neighbours_[dir] = NULL;
210  good_stroke_neighbours_[dir] = false;
211  }
212  }
213 }
214 
215 // Returns positive if there is at least one side neighbour that has a similar
216 // stroke width and is not on the other side of a rule line.
218  int score = 0;
219  for (int dir = 0; dir < BND_COUNT; ++dir) {
220  BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
221  if (good_stroke_neighbour(bnd))
222  ++score;
223  }
224  return score;
225 }
226 
227 // Returns the number of side neighbours that are of type BRT_NOISE.
229  int count = 0;
230  for (int dir = 0; dir < BND_COUNT; ++dir) {
231  BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
232  BLOBNBOX* blob = neighbour(bnd);
233  if (blob != NULL && blob->region_type() == BRT_NOISE)
234  ++count;
235  }
236  return count;
237 }
238 
239 // Returns true, and sets vert_possible/horz_possible if the blob has some
240 // feature that makes it individually appear to flow one way.
241 // eg if it has a high aspect ratio, yet has a complex shape, such as a
242 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
244  int box_perimeter = 2 * (box.height() + box.width());
245  if (box.width() > box.height() * kDefiniteAspectRatio) {
246  // Attempt to distinguish a wide joined word from a dash.
247  // If it is a dash, then its perimeter is approximately
248  // 2 * (box width + stroke width), but more if the outline is noisy,
249  // so perimeter - 2*(box width + stroke width) should be close to zero.
250  // A complex shape such as a joined word should have a much larger value.
251  int perimeter = cblob()->perimeter();
252  if (vert_stroke_width() > 0)
253  perimeter -= 2 * vert_stroke_width();
254  else
255  perimeter -= 4 * cblob()->area() / perimeter;
256  perimeter -= 2 * box.width();
257  // Use a multiple of the box perimeter as a threshold.
258  if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
259  set_vert_possible(false);
260  set_horz_possible(true);
261  return true;
262  }
263  }
264  if (box.height() > box.width() * kDefiniteAspectRatio) {
265  // As above, but for a putative vertical word vs a I/1/l.
266  int perimeter = cblob()->perimeter();
267  if (horz_stroke_width() > 0)
268  perimeter -= 2 * horz_stroke_width();
269  else
270  perimeter -= 4 * cblob()->area() / perimeter;
271  perimeter -= 2 * box.height();
272  if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
273  set_vert_possible(true);
274  set_horz_possible(false);
275  return true;
276  }
277  }
278  return false;
279 }
280 
281 // Returns true if there is no tabstop violation in merging this and other.
282 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const {
283  if (box.left() < other.box.left() && box.left() < other.left_rule_)
284  return false;
285  if (other.box.left() < box.left() && other.box.left() < left_rule_)
286  return false;
287  if (box.right() > other.box.right() && box.right() > other.right_rule_)
288  return false;
289  if (other.box.right() > box.right() && other.box.right() > right_rule_)
290  return false;
291  return true;
292 }
293 
294 // Returns true if other has a similar stroke width to this.
296  double fractional_tolerance,
297  double constant_tolerance) const {
298  // The perimeter-based width is used as a backup in case there is
299  // no information in the blob.
300  double p_width = area_stroke_width();
301  double n_p_width = other.area_stroke_width();
302  float h_tolerance = horz_stroke_width_ * fractional_tolerance
303  + constant_tolerance;
304  float v_tolerance = vert_stroke_width_ * fractional_tolerance
305  + constant_tolerance;
306  double p_tolerance = p_width * fractional_tolerance
307  + constant_tolerance;
308  bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
309  bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
310  bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_,
311  other.horz_stroke_width_, h_tolerance);
312  bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_,
313  other.vert_stroke_width_, v_tolerance);
314  bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
315  // For a match, at least one of the horizontal and vertical widths
316  // must match, and the other one must either match or be zero.
317  // Only if both are zero will we look at the perimeter metric.
318  return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
319 }
320 
321 // Returns a bounding box of the outline contained within the
322 // given horizontal range.
323 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
324  FCOORD no_rotation(1.0f, 0.0f);
325  float top = box.top();
326  float bottom = box.bottom();
327  if (cblob_ptr != NULL) {
328  find_cblob_limits(cblob_ptr, static_cast<float>(left),
329  static_cast<float>(right), no_rotation,
330  bottom, top);
331  }
332 
333  if (top < bottom) {
334  top = box.top();
335  bottom = box.bottom();
336  }
337  FCOORD bot_left(left, bottom);
338  FCOORD top_right(right, top);
339  TBOX shrunken_box(bot_left);
340  TBOX shrunken_box2(top_right);
341  shrunken_box += shrunken_box2;
342  return shrunken_box;
343 }
344 
345 // Helper to call CleanNeighbours on all blobs on the list.
346 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) {
347  BLOBNBOX_IT blob_it(blobs);
348  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
349  blob_it.data()->CleanNeighbours();
350  }
351 }
352 
353 // Helper to delete all the deletable blobs on the list.
354 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
355  BLOBNBOX_IT blob_it(blobs);
356  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
357  BLOBNBOX* blob = blob_it.data();
358  if (blob->DeletableNoise()) {
359  delete blob->cblob();
360  delete blob_it.extract();
361  }
362  }
363 }
364 
365 #ifndef GRAPHICS_DISABLED
366 // Helper to draw all the blobs on the list in the given body_colour,
367 // with child outlines in the child_colour.
368 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list,
369  ScrollView::Color body_colour,
370  ScrollView::Color child_colour,
371  ScrollView* win) {
372  BLOBNBOX_IT it(list);
373  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
374  it.data()->plot(win, body_colour, child_colour);
375  }
376 }
377 
378 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
379 // given list in the given body_colour, with child outlines in the
380 // child_colour.
381 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
382  ScrollView::Color body_colour,
383  ScrollView::Color child_colour,
384  ScrollView* win) {
385  BLOBNBOX_IT it(list);
386  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
387  BLOBNBOX* blob = it.data();
388  if (blob->DeletableNoise())
389  blob->plot(win, body_colour, child_colour);
390  }
391 }
392 
394  BlobTextFlowType flow_type) {
395  switch (region_type) {
396  case BRT_HLINE:
397  return ScrollView::BROWN;
398  case BRT_VLINE:
399  return ScrollView::DARK_GREEN;
400  case BRT_RECTIMAGE:
401  return ScrollView::RED;
402  case BRT_POLYIMAGE:
403  return ScrollView::ORANGE;
404  case BRT_UNKNOWN:
405  return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
406  case BRT_VERT_TEXT:
407  if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
408  return ScrollView::GREEN;
409  if (flow_type == BTFT_CHAIN)
410  return ScrollView::LIME_GREEN;
411  return ScrollView::YELLOW;
412  case BRT_TEXT:
413  if (flow_type == BTFT_STRONG_CHAIN)
414  return ScrollView::BLUE;
415  if (flow_type == BTFT_TEXT_ON_IMAGE)
416  return ScrollView::LIGHT_BLUE;
417  if (flow_type == BTFT_CHAIN)
419  if (flow_type == BTFT_LEADER)
420  return ScrollView::WHEAT;
421  if (flow_type == BTFT_NONTEXT)
422  return ScrollView::PINK;
423  return ScrollView::MAGENTA;
424  default:
425  return ScrollView::GREY;
426  }
427 }
428 
429 // Keep in sync with BlobRegionType.
431  return TextlineColor(region_type_, flow_);
432 }
433 
434 void BLOBNBOX::plot(ScrollView* window, // window to draw in
435  ScrollView::Color blob_colour, // for outer bits
436  ScrollView::Color child_colour) { // for holes
437  if (cblob_ptr != NULL)
438  cblob_ptr->plot(window, blob_colour, child_colour);
439 }
440 #endif
441 /**********************************************************************
442  * find_cblob_limits
443  *
444  * Scan the outlines of the cblob to locate the y min and max
445  * between the given x limits.
446  **********************************************************************/
447 
448 void find_cblob_limits( //get y limits
449  C_BLOB *blob, //blob to search
450  float leftx, //x limits
451  float rightx,
452  FCOORD rotation, //for landscape
453  float &ymin, //output y limits
454  float &ymax) {
455  inT16 stepindex; //current point
456  ICOORD pos; //current coords
457  ICOORD vec; //rotated step
458  C_OUTLINE *outline; //current outline
459  //outlines
460  C_OUTLINE_IT out_it = blob->out_list ();
461 
462  ymin = (float) MAX_INT32;
463  ymax = (float) -MAX_INT32;
464  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
465  outline = out_it.data ();
466  pos = outline->start_pos (); //get coords
467  pos.rotate (rotation);
468  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
469  //inside
470  if (pos.x () >= leftx && pos.x () <= rightx) {
471  UpdateRange(pos.y(), &ymin, &ymax);
472  }
473  vec = outline->step (stepindex);
474  vec.rotate (rotation);
475  pos += vec; //move to next
476  }
477  }
478 }
479 
480 
481 /**********************************************************************
482  * find_cblob_vlimits
483  *
484  * Scan the outlines of the cblob to locate the y min and max
485  * between the given x limits.
486  **********************************************************************/
487 
488 void find_cblob_vlimits( //get y limits
489  C_BLOB *blob, //blob to search
490  float leftx, //x limits
491  float rightx,
492  float &ymin, //output y limits
493  float &ymax) {
494  inT16 stepindex; //current point
495  ICOORD pos; //current coords
496  ICOORD vec; //rotated step
497  C_OUTLINE *outline; //current outline
498  //outlines
499  C_OUTLINE_IT out_it = blob->out_list ();
500 
501  ymin = (float) MAX_INT32;
502  ymax = (float) -MAX_INT32;
503  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
504  outline = out_it.data ();
505  pos = outline->start_pos (); //get coords
506  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
507  //inside
508  if (pos.x () >= leftx && pos.x () <= rightx) {
509  UpdateRange(pos.y(), &ymin, &ymax);
510  }
511  vec = outline->step (stepindex);
512  pos += vec; //move to next
513  }
514  }
515 }
516 
517 
518 /**********************************************************************
519  * find_cblob_hlimits
520  *
521  * Scan the outlines of the cblob to locate the x min and max
522  * between the given y limits.
523  **********************************************************************/
524 
525 void find_cblob_hlimits( //get x limits
526  C_BLOB *blob, //blob to search
527  float bottomy, //y limits
528  float topy,
529  float &xmin, //output x limits
530  float &xmax) {
531  inT16 stepindex; //current point
532  ICOORD pos; //current coords
533  ICOORD vec; //rotated step
534  C_OUTLINE *outline; //current outline
535  //outlines
536  C_OUTLINE_IT out_it = blob->out_list ();
537 
538  xmin = (float) MAX_INT32;
539  xmax = (float) -MAX_INT32;
540  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
541  outline = out_it.data ();
542  pos = outline->start_pos (); //get coords
543  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
544  //inside
545  if (pos.y () >= bottomy && pos.y () <= topy) {
546  UpdateRange(pos.x(), &xmin, &xmax);
547  }
548  vec = outline->step (stepindex);
549  pos += vec; //move to next
550  }
551  }
552 }
553 
554 /**********************************************************************
555  * crotate_cblob
556  *
557  * Rotate the copy by the given vector and return a C_BLOB.
558  **********************************************************************/
559 
560 C_BLOB *crotate_cblob( //rotate it
561  C_BLOB *blob, //blob to search
562  FCOORD rotation //for landscape
563  ) {
564  C_OUTLINE_LIST out_list; //output outlines
565  //input outlines
566  C_OUTLINE_IT in_it = blob->out_list ();
567  //output outlines
568  C_OUTLINE_IT out_it = &out_list;
569 
570  for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
571  out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
572  }
573  return new C_BLOB (&out_list);
574 }
575 
576 
577 /**********************************************************************
578  * box_next
579  *
580  * Compute the bounding box of this blob with merging of x overlaps
581  * but no pre-chopping.
582  * Then move the iterator on to the start of the next blob.
583  **********************************************************************/
584 
585 TBOX box_next( //get bounding box
586  BLOBNBOX_IT *it //iterator to blobds
587  ) {
588  BLOBNBOX *blob; //current blob
589  TBOX result; //total box
590 
591  blob = it->data ();
592  result = blob->bounding_box ();
593  do {
594  it->forward ();
595  blob = it->data ();
596  if (blob->cblob() == NULL)
597  //was pre-chopped
598  result += blob->bounding_box ();
599  }
600  //until next real blob
601  while ((blob->cblob() == NULL) || blob->joined_to_prev());
602  return result;
603 }
604 
605 
606 /**********************************************************************
607  * box_next_pre_chopped
608  *
609  * Compute the bounding box of this blob with merging of x overlaps
610  * but WITH pre-chopping.
611  * Then move the iterator on to the start of the next pre-chopped blob.
612  **********************************************************************/
613 
614 TBOX box_next_pre_chopped( //get bounding box
615  BLOBNBOX_IT *it //iterator to blobds
616  ) {
617  BLOBNBOX *blob; //current blob
618  TBOX result; //total box
619 
620  blob = it->data ();
621  result = blob->bounding_box ();
622  do {
623  it->forward ();
624  blob = it->data ();
625  }
626  //until next real blob
627  while (blob->joined_to_prev ());
628  return result;
629 }
630 
631 
632 /**********************************************************************
633  * TO_ROW::TO_ROW
634  *
635  * Constructor to make a row from a blob.
636  **********************************************************************/
637 
638 TO_ROW::TO_ROW ( //constructor
639 BLOBNBOX * blob, //first blob
640 float top, //corrected top
641 float bottom, //of row
642 float row_size //ideal
643 ) {
644  clear();
645  y_min = bottom;
646  y_max = top;
647  initial_y_min = bottom;
648 
649  float diff; //in size
650  BLOBNBOX_IT it = &blobs; //list of blobs
651 
652  it.add_to_end (blob);
653  diff = top - bottom - row_size;
654  if (diff > 0) {
655  y_max -= diff / 2;
656  y_min += diff / 2;
657  }
658  //very small object
659  else if ((top - bottom) * 3 < row_size) {
660  diff = row_size / 3 + bottom - top;
661  y_max += diff / 2;
662  y_min -= diff / 2;
663  }
664 }
665 
666 
667 /**********************************************************************
668  * TO_ROW:add_blob
669  *
670  * Add the blob to the end of the row.
671  **********************************************************************/
672 
673 void TO_ROW::add_blob( //constructor
674  BLOBNBOX *blob, //first blob
675  float top, //corrected top
676  float bottom, //of row
677  float row_size //ideal
678  ) {
679  float allowed; //allowed expansion
680  float available; //expansion
681  BLOBNBOX_IT it = &blobs; //list of blobs
682 
683  it.add_to_end (blob);
684  allowed = row_size + y_min - y_max;
685  if (allowed > 0) {
686  available = top > y_max ? top - y_max : 0;
687  if (bottom < y_min)
688  //total available
689  available += y_min - bottom;
690  if (available > 0) {
691  available += available; //do it gradually
692  if (available < allowed)
693  available = allowed;
694  if (bottom < y_min)
695  y_min -= (y_min - bottom) * allowed / available;
696  if (top > y_max)
697  y_max += (top - y_max) * allowed / available;
698  }
699  }
700 }
701 
702 
703 /**********************************************************************
704  * TO_ROW:insert_blob
705  *
706  * Add the blob to the row in the correct position.
707  **********************************************************************/
708 
709 void TO_ROW::insert_blob( //constructor
710  BLOBNBOX *blob //first blob
711  ) {
712  BLOBNBOX_IT it = &blobs; //list of blobs
713 
714  if (it.empty ())
715  it.add_before_then_move (blob);
716  else {
717  it.mark_cycle_pt ();
718  while (!it.cycled_list ()
719  && it.data ()->bounding_box ().left () <=
720  blob->bounding_box ().left ())
721  it.forward ();
722  if (it.cycled_list ())
723  it.add_to_end (blob);
724  else
725  it.add_before_stay_put (blob);
726  }
727 }
728 
729 
730 /**********************************************************************
731  * TO_ROW::compute_vertical_projection
732  *
733  * Compute the vertical projection of a TO_ROW from its blobs.
734  **********************************************************************/
735 
736 void TO_ROW::compute_vertical_projection() { //project whole row
737  TBOX row_box; //bound of row
738  BLOBNBOX *blob; //current blob
739  TBOX blob_box; //bounding box
740  BLOBNBOX_IT blob_it = blob_list ();
741 
742  if (blob_it.empty ())
743  return;
744  row_box = blob_it.data ()->bounding_box ();
745  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
746  row_box += blob_it.data ()->bounding_box ();
747 
749  row_box.right () + PROJECTION_MARGIN);
750  projection_left = row_box.left () - PROJECTION_MARGIN;
752  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
753  blob = blob_it.data();
754  if (blob->cblob() != NULL)
756  }
757 }
758 
759 
760 /**********************************************************************
761  * TO_ROW::clear
762  *
763  * Zero out all scalar members.
764  **********************************************************************/
765 void TO_ROW::clear() {
766  all_caps = 0;
767  used_dm_model = 0;
768  projection_left = 0;
769  projection_right = 0;
771  fixed_pitch = 0.0;
772  fp_space = 0.0;
773  fp_nonsp = 0.0;
774  pr_space = 0.0;
775  pr_nonsp = 0.0;
776  spacing = 0.0;
777  xheight = 0.0;
778  xheight_evidence = 0;
779  body_size = 0.0;
780  ascrise = 0.0;
781  descdrop = 0.0;
782  min_space = 0;
783  max_nonspace = 0;
784  space_threshold = 0;
785  kern_size = 0.0;
786  space_size = 0.0;
787  y_min = 0.0;
788  y_max = 0.0;
789  initial_y_min = 0.0;
790  m = 0.0;
791  c = 0.0;
792  error = 0.0;
793  para_c = 0.0;
794  para_error = 0.0;
795  y_origin = 0.0;
796  credibility = 0.0;
797  num_repeated_sets_ = -1;
798 }
799 
800 
801 /**********************************************************************
802  * vertical_cblob_projection
803  *
804  * Compute the vertical projection of a cblob from its outlines
805  * and add to the given STATS.
806  **********************************************************************/
807 
808 void vertical_cblob_projection( //project outlines
809  C_BLOB *blob, //blob to project
810  STATS *stats //output
811  ) {
812  //outlines of blob
813  C_OUTLINE_IT out_it = blob->out_list ();
814 
815  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
816  vertical_coutline_projection (out_it.data (), stats);
817  }
818 }
819 
820 
821 /**********************************************************************
822  * vertical_coutline_projection
823  *
824  * Compute the vertical projection of a outline from its outlines
825  * and add to the given STATS.
826  **********************************************************************/
827 
828 void vertical_coutline_projection( //project outlines
829  C_OUTLINE *outline, //outline to project
830  STATS *stats //output
831  ) {
832  ICOORD pos; //current point
833  ICOORD step; //edge step
834  inT32 length; //of outline
835  inT16 stepindex; //current step
836  C_OUTLINE_IT out_it = outline->child ();
837 
838  pos = outline->start_pos ();
839  length = outline->pathlength ();
840  for (stepindex = 0; stepindex < length; stepindex++) {
841  step = outline->step (stepindex);
842  if (step.x () > 0) {
843  stats->add (pos.x (), -pos.y ());
844  } else if (step.x () < 0) {
845  stats->add (pos.x () - 1, pos.y ());
846  }
847  pos += step;
848  }
849 
850  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
851  vertical_coutline_projection (out_it.data (), stats);
852  }
853 }
854 
855 
856 /**********************************************************************
857  * TO_BLOCK::TO_BLOCK
858  *
859  * Constructor to make a TO_BLOCK from a real block.
860  **********************************************************************/
861 
862 TO_BLOCK::TO_BLOCK( //make a block
863  BLOCK *src_block //real block
864  ) {
865  clear();
866  block = src_block;
867 }
868 
869 static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
870  BLOBNBOX_IT it = boxes;
871  // A BLOBNBOX generally doesn't own its blobs, so if they do, you
872  // have to delete them explicitly.
873  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
874  BLOBNBOX* box = it.data();
875  if (box->cblob() != NULL)
876  delete box->cblob();
877  }
878 }
879 
880 /**********************************************************************
881  * TO_BLOCK::clear
882  *
883  * Zero out all scalar members.
884  **********************************************************************/
886  block = NULL;
888  line_spacing = 0.0;
889  line_size = 0.0;
890  max_blob_size = 0.0;
891  baseline_offset = 0.0;
892  xheight = 0.0;
893  fixed_pitch = 0.0;
894  kern_size = 0.0;
895  space_size = 0.0;
896  min_space = 0;
897  max_nonspace = 0;
898  fp_space = 0.0;
899  fp_nonsp = 0.0;
900  pr_space = 0.0;
901  pr_nonsp = 0.0;
902  key_row = NULL;
903 }
904 
905 
907  // Any residual BLOBNBOXes at this stage own their blobs, so delete them.
908  clear_blobnboxes(&blobs);
909  clear_blobnboxes(&underlines);
910  clear_blobnboxes(&noise_blobs);
911  clear_blobnboxes(&small_blobs);
912  clear_blobnboxes(&large_blobs);
913 }
914 
915 // Helper function to divide the input blobs over noise, small, medium
916 // and large lists. Blobs small in height and (small in width or large in width)
917 // go in the noise list. Dash (-) candidates go in the small list, and
918 // medium and large are by height.
919 // SIDE-EFFECT: reset all blobs to initial state by calling Init().
920 static void SizeFilterBlobs(int min_height, int max_height,
921  BLOBNBOX_LIST* src_list,
922  BLOBNBOX_LIST* noise_list,
923  BLOBNBOX_LIST* small_list,
924  BLOBNBOX_LIST* medium_list,
925  BLOBNBOX_LIST* large_list) {
926  BLOBNBOX_IT noise_it(noise_list);
927  BLOBNBOX_IT small_it(small_list);
928  BLOBNBOX_IT medium_it(medium_list);
929  BLOBNBOX_IT large_it(large_list);
930  for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
931  BLOBNBOX* blob = src_it.extract();
932  blob->ReInit();
933  int width = blob->bounding_box().width();
934  int height = blob->bounding_box().height();
935  if (height < min_height &&
936  (width < min_height || width > max_height))
937  noise_it.add_after_then_move(blob);
938  else if (height > max_height)
939  large_it.add_after_then_move(blob);
940  else if (height < min_height)
941  small_it.add_after_then_move(blob);
942  else
943  medium_it.add_after_then_move(blob);
944  }
945 }
946 
947 // Reorganize the blob lists with a different definition of small, medium
948 // and large, compared to the original definition.
949 // Height is still the primary filter key, but medium width blobs of small
950 // height become small, and very wide blobs of small height stay noise, along
951 // with small dot-shaped blobs.
953  int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
954  int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
955  BLOBNBOX_LIST noise_list;
956  BLOBNBOX_LIST small_list;
957  BLOBNBOX_LIST medium_list;
958  BLOBNBOX_LIST large_list;
959  SizeFilterBlobs(min_height, max_height, &blobs,
960  &noise_list, &small_list, &medium_list, &large_list);
961  SizeFilterBlobs(min_height, max_height, &large_blobs,
962  &noise_list, &small_list, &medium_list, &large_list);
963  SizeFilterBlobs(min_height, max_height, &small_blobs,
964  &noise_list, &small_list, &medium_list, &large_list);
965  SizeFilterBlobs(min_height, max_height, &noise_blobs,
966  &noise_list, &small_list, &medium_list, &large_list);
967  BLOBNBOX_IT blob_it(&blobs);
968  blob_it.add_list_after(&medium_list);
969  blob_it.set_to_list(&large_blobs);
970  blob_it.add_list_after(&large_list);
971  blob_it.set_to_list(&small_blobs);
972  blob_it.add_list_after(&small_list);
973  blob_it.set_to_list(&noise_blobs);
974  blob_it.add_list_after(&noise_list);
975 }
976 
977 // Deletes noise blobs from all lists where not owned by a ColPartition.
987 }
988 
989 #ifndef GRAPHICS_DISABLED
990 // Draw the noise blobs from all lists in red.
996 }
997 
998 // Draw the blobs on the various lists in the block in different colors.
1002  win);
1004  win);
1006 }
1007 
1008 /**********************************************************************
1009  * plot_blob_list
1010  *
1011  * Draw a list of blobs.
1012  **********************************************************************/
1013 
1014 void plot_blob_list(ScrollView* win, // window to draw in
1015  BLOBNBOX_LIST *list, // blob list
1016  ScrollView::Color body_colour, // colour to draw
1017  ScrollView::Color child_colour) { // colour of child
1018  BLOBNBOX_IT it = list;
1019  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1020  it.data()->plot(win, body_colour, child_colour);
1021  }
1022 }
1023 #endif // GRAPHICS_DISABLED