Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
edgblob.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: edgblob.c (Formerly edgeloop.c)
3  * Description: Functions to clean up an outline before approximation.
4  * Author: Ray Smith
5  * Created: Tue Mar 26 16:56:25 GMT 1991
6  *
7  *(C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0(the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #include "scanedg.h"
22 #include "drawedg.h"
23 #include "edgloop.h"
24 #include "edgblob.h"
25 
26 // Include automatically generated configuration file if running autoconf.
27 #ifdef HAVE_CONFIG_H
28 #include "config_auto.h"
29 #endif
30 
31 #define EXTERN
32 
33 // Control parameters used in outline_complexity(), which rejects an outline
34 // if any one of the 3 conditions is satisfied:
35 // - number of children exceeds edges_max_children_per_outline
36 // - number of nested layers exceeds edges_max_children_layers
37 // - joint complexity exceeds edges_children_count_limit(as in child_count())
39  "Use the new outline complexity module");
41  "Max number of children inside a character outline");
43  "Max layers of nested children inside a character outline");
45  "turn on debugging for this module");
46 
47 
49  "Importance ratio for chucking outlines");
51  "Max holes allowed in blob");
53  "Remove boxy parents of char-like children");
55  "Min pixels for potential char in box");
57  "Max lensq/area for acceptable child outline");
59  "Min area fraction of child outline");
61  "Min area fraction of grandchild for box");
62 
70 ICOORD bleft, // corners
71 ICOORD tright): bl(bleft), tr(tright) {
72  bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1;
73  bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1;
74  // make array
75  buckets = new C_OUTLINE_LIST[bxdim * bydim];
76  index = 0;
77 }
78 
79 
87 C_OUTLINE_LIST *
88 OL_BUCKETS::operator()( // array access
89 inT16 x, // image coords
90 inT16 y) {
91  return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
92 }
93 
94 
116  C_OUTLINE *outline, // parent outline
117  inT32 max_count, // max output
118  inT16 depth // recurion depth
119  ) {
120  inT16 xmin, xmax; // coord limits
121  inT16 ymin, ymax;
122  inT16 xindex, yindex; // current bucket
123  C_OUTLINE *child; // current child
124  inT32 child_count; // no of children
125  inT32 grandchild_count; // no of grandchildren
126  C_OUTLINE_IT child_it; // search iterator
127 
128  TBOX olbox = outline->bounding_box();
129  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
130  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
131  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
132  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
133  child_count = 0;
134  grandchild_count = 0;
135  if (++depth > edges_max_children_layers) // nested loops are too deep
136  return max_count + depth;
137 
138  for (yindex = ymin; yindex <= ymax; yindex++) {
139  for (xindex = xmin; xindex <= xmax; xindex++) {
140  child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
141  if (child_it.empty())
142  continue;
143  for (child_it.mark_cycle_pt(); !child_it.cycled_list();
144  child_it.forward()) {
145  child = child_it.data();
146  if (child == outline || !(*child < *outline))
147  continue;
148  child_count++;
149 
150  if (child_count > edges_max_children_per_outline) { // too fragmented
151  if (edges_debug)
152  tprintf("Discard outline on child_count=%d > "
153  "max_children_per_outline=%d\n",
154  child_count,
155  static_cast<inT32>(edges_max_children_per_outline));
156  return max_count + child_count;
157  }
158 
159  // Compute the "complexity" of each child recursively
160  inT32 remaining_count = max_count - child_count - grandchild_count;
161  if (remaining_count > 0)
162  grandchild_count += edges_children_per_grandchild *
163  outline_complexity(child, remaining_count, depth);
164  if (child_count + grandchild_count > max_count) { // too complex
165  if (edges_debug)
166  tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
167  "> max_count=%d\n",
168  child_count, grandchild_count, max_count);
169  return child_count + grandchild_count;
170  }
171  }
172  }
173  }
174  return child_count + grandchild_count;
175 }
176 
177 
185  C_OUTLINE *outline, // parent outline
186  inT32 max_count // max output
187  ) {
188  BOOL8 parent_box; // could it be boxy
189  inT16 xmin, xmax; // coord limits
190  inT16 ymin, ymax;
191  inT16 xindex, yindex; // current bucket
192  C_OUTLINE *child; // current child
193  inT32 child_count; // no of children
194  inT32 grandchild_count; // no of grandchildren
195  inT32 parent_area; // potential box
196  FLOAT32 max_parent_area; // potential box
197  inT32 child_area; // current child
198  inT32 child_length; // current child
199  TBOX olbox;
200  C_OUTLINE_IT child_it; // search iterator
201 
202  olbox = outline->bounding_box();
203  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
204  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
205  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
206  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
207  child_count = 0;
208  grandchild_count = 0;
209  parent_area = 0;
210  max_parent_area = 0;
211  parent_box = TRUE;
212  for (yindex = ymin; yindex <= ymax; yindex++) {
213  for (xindex = xmin; xindex <= xmax; xindex++) {
214  child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
215  if (child_it.empty())
216  continue;
217  for (child_it.mark_cycle_pt(); !child_it.cycled_list();
218  child_it.forward()) {
219  child = child_it.data();
220  if (child != outline && *child < *outline) {
221  child_count++;
222  if (child_count <= max_count) {
223  int max_grand =(max_count - child_count) /
225  if (max_grand > 0)
226  grandchild_count += count_children(child, max_grand) *
228  else
229  grandchild_count += count_children(child, 1);
230  }
231  if (child_count + grandchild_count > max_count) {
232  if (edges_debug)
233  tprintf("Discarding parent with child count=%d, gc=%d\n",
234  child_count,grandchild_count);
235  return child_count + grandchild_count;
236  }
237  if (parent_area == 0) {
238  parent_area = outline->outer_area();
239  if (parent_area < 0)
240  parent_area = -parent_area;
241  max_parent_area = outline->bounding_box().area() * edges_boxarea;
242  if (parent_area < max_parent_area)
243  parent_box = FALSE;
244  }
245  if (parent_box &&
246  (!edges_children_fix ||
247  child->bounding_box().height() > edges_min_nonhole)) {
248  child_area = child->outer_area();
249  if (child_area < 0)
250  child_area = -child_area;
251  if (edges_children_fix) {
252  if (parent_area - child_area < max_parent_area) {
253  parent_box = FALSE;
254  continue;
255  }
256  if (grandchild_count > 0) {
257  if (edges_debug)
258  tprintf("Discarding parent of area %d, child area=%d, max%g "
259  "with gc=%d\n",
260  parent_area, child_area, max_parent_area,
261  grandchild_count);
262  return max_count + 1;
263  }
264  child_length = child->pathlength();
265  if (child_length * child_length >
266  child_area * edges_patharea_ratio) {
267  if (edges_debug)
268  tprintf("Discarding parent of area %d, child area=%d, max%g "
269  "with child length=%d\n",
270  parent_area, child_area, max_parent_area,
271  child_length);
272  return max_count + 1;
273  }
274  }
275  if (child_area < child->bounding_box().area() * edges_childarea) {
276  if (edges_debug)
277  tprintf("Discarding parent of area %d, child area=%d, max%g "
278  "with child rect=%d\n",
279  parent_area, child_area, max_parent_area,
280  child->bounding_box().area());
281  return max_count + 1;
282  }
283  }
284  }
285  }
286  }
287  }
288  return child_count + grandchild_count;
289 }
290 
291 
292 
293 
300 void OL_BUCKETS::extract_children( // recursive count
301  C_OUTLINE *outline, // parent outline
302  C_OUTLINE_IT *it // destination iterator
303  ) {
304  inT16 xmin, xmax; // coord limits
305  inT16 ymin, ymax;
306  inT16 xindex, yindex; // current bucket
307  TBOX olbox;
308  C_OUTLINE_IT child_it; // search iterator
309 
310  olbox = outline->bounding_box();
311  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
312  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
313  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
314  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
315  for (yindex = ymin; yindex <= ymax; yindex++) {
316  for (xindex = xmin; xindex <= xmax; xindex++) {
317  child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
318  for (child_it.mark_cycle_pt(); !child_it.cycled_list();
319  child_it.forward()) {
320  if (*child_it.data() < *outline) {
321  it->add_after_then_move(child_it.extract());
322  }
323  }
324  }
325  }
326 }
327 
328 
335 void extract_edges(Pix* pix, // thresholded image
336  BLOCK *block) { // block to scan
337  C_OUTLINE_LIST outlines; // outlines in block
338  C_OUTLINE_IT out_it = &outlines;
339 
340  // TODO(rays) move the pix all the way down to the bottom.
341  IMAGE image;
342  image.FromPix(pix);
343 
344  block_edges(&image, block, &out_it);
345  ICOORD bleft; // block box
346  ICOORD tright;
347  block->bounding_box(bleft, tright);
348  // make blobs
349  outlines_to_blobs(block, bleft, tright, &outlines);
350 }
351 
352 
359 void outlines_to_blobs( // find blobs
360  BLOCK *block, // block to scan
361  ICOORD bleft,
362  ICOORD tright,
363  C_OUTLINE_LIST *outlines) {
364  // make buckets
365  OL_BUCKETS buckets(bleft, tright);
366 
367  fill_buckets(outlines, &buckets);
368  empty_buckets(block, &buckets);
369 }
370 
371 
378 void fill_buckets( // find blobs
379  C_OUTLINE_LIST *outlines, // outlines in block
380  OL_BUCKETS *buckets // output buckets
381  ) {
382  TBOX ol_box; // outline box
383  C_OUTLINE_IT out_it = outlines; // iterator
384  C_OUTLINE_IT bucket_it; // iterator in bucket
385  C_OUTLINE *outline; // current outline
386 
387  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
388  outline = out_it.extract(); // take off list
389  // get box
390  ol_box = outline->bounding_box();
391  bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
392  bucket_it.add_to_end(outline);
393  }
394 }
395 
396 
403 void empty_buckets( // find blobs
404  BLOCK *block, // block to scan
405  OL_BUCKETS *buckets // output buckets
406  ) {
407  BOOL8 good_blob; // healthy blob
408  C_OUTLINE_LIST outlines; // outlines in block
409  // iterator
410  C_OUTLINE_IT out_it = &outlines;
411  C_OUTLINE_IT bucket_it = buckets->start_scan();
412  C_OUTLINE_IT parent_it; // parent outline
413  C_BLOB *blob; // new blob
414  C_BLOB_IT good_blobs = block->blob_list();
415  C_BLOB_IT junk_blobs = block->reject_blobs();
416 
417  while (!bucket_it.empty()) {
418  out_it.set_to_list(&outlines);
419  do {
420  parent_it = bucket_it; // find outermost
421  do {
422  bucket_it.forward();
423  } while (!bucket_it.at_first() &&
424  !(*parent_it.data() < *bucket_it.data()));
425  } while (!bucket_it.at_first());
426 
427  // move to new list
428  out_it.add_after_then_move(parent_it.extract());
429  good_blob = capture_children(buckets, &junk_blobs, &out_it);
430  blob = new C_BLOB(&outlines);
431  if (good_blob)
432  good_blobs.add_after_then_move(blob);
433  else
434  junk_blobs.add_after_then_move(blob);
435 
436  bucket_it.set_to_list(buckets->scan_next());
437  }
438 }
439 
440 
449 BOOL8 capture_children( // find children
450  OL_BUCKETS *buckets, // bucket sort clanss
451  C_BLOB_IT *reject_it, // dead grandchildren
452  C_OUTLINE_IT *blob_it // output outlines
453  ) {
454  C_OUTLINE *outline; // master outline
455  inT32 child_count; // no of children
456 
457  outline = blob_it->data();
459  child_count = buckets->outline_complexity(outline,
461  0);
462  else
463  child_count = buckets->count_children(outline,
465  if (child_count > edges_children_count_limit)
466  return FALSE;
467 
468  if (child_count > 0)
469  buckets->extract_children(outline, blob_it);
470  return TRUE;
471 }