Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
bestfirst.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: bestfirst.c (Formerly bestfirst.c)
5  * Description: Best first search functions
6  * Author: Mark Seaman, OCR Technology
7  * Created: Mon May 14 11:23:29 1990
8  * Modified: Tue Jul 30 16:08:47 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Experimental (Do Not Distribute)
12  *
13  * (c) Copyright 1990, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  ***************************************************************************/
25 
26 /*----------------------------------------------------------------------
27  I n c l u d e s
28 ---------------------------------------------------------------------*/
29 
30 #include <assert.h>
31 
32 #include "associate.h"
33 #include "bestfirst.h"
34 #include "baseline.h"
35 #include "bitvec.h"
36 #include "dict.h"
37 #include "freelist.h"
38 #include "globals.h"
39 #include "helpers.h"
40 #include "pageres.h"
41 #include "permute.h"
42 #include "plotseg.h"
43 #include "ratngs.h"
44 #include "states.h"
45 #include "stopper.h"
46 #include "structures.h"
47 #include "unicharset.h"
48 #include "wordclass.h"
49 #include "wordrec.h"
50 
51 // Include automatically generated configuration file if running autoconf.
52 #ifdef HAVE_CONFIG_H
53 #include "config_auto.h"
54 #endif
55 
56 void call_caller();
57 
58 static void log_state(const char * message,
59  int num_joints,
60  STATE *state) {
61  STRING segstate;
62  print_state(state, num_joints, &segstate);
63  tprintf("%20s [%40s]\n", message, segstate.string());
64 }
65 
66 static void log_state(const char * message,
67  int num_joints,
68  STATE *state,
69  float priority) {
70  STRING segstate;
71  print_state(state, num_joints, &segstate);
72  tprintf("%20s [%40s], priority %8.3f\n", message,
73  segstate.string(), priority);
74 }
75 
76 
77 /*----------------------------------------------------------------------
78  F u n c t i o n s
79 ----------------------------------------------------------------------*/
80 
81 namespace tesseract {
89  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
90  WERD_RES *word,
91  STATE *state,
92  DANGERR *fixpt,
93  STATE *best_state) {
94  SEARCH_RECORD *the_search;
95  inT16 keep_going;
96  STATE guided_state; // not used
97 
98  int num_joints = chunks_record->ratings->dimension() - 1;
99  the_search = new_search(chunks_record, num_joints, best_char_choices,
100  word->best_choice, word->raw_choice, state);
101 
102  // The default state is initialized as the best choice. In order to apply
103  // segmentation adjustment, or any other contextual processing in permute,
104  // we give the best choice a poor rating to force the processed raw choice
105  // to be promoted to best choice.
107  evaluate_state(chunks_record, the_search, fixpt, word->blamer_bundle);
108  if (wordrec_debug_level > 1) {
109  tprintf("\n\n\n =========== BestFirstSearch ==============\n");
110  word->best_choice->print("**Initial BestChoice**");
111  }
112 
113  FLOAT32 worst_priority = 2.0f * prioritize_state(chunks_record, the_search);
114  if (worst_priority < wordrec_worst_state)
115  worst_priority = wordrec_worst_state;
116  if (wordrec_debug_level > 1) {
117  log_state("BestFirstSearch", num_joints, best_state);
118  }
119 
120  guided_state = *state;
121  do {
122  /* Look for answer */
123  STATE orig_state = *the_search->this_state;
124  if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
125  guided_state = *(the_search->this_state);
126  keep_going = evaluate_state(chunks_record, the_search, fixpt,
127  word->blamer_bundle);
128  hash_add (the_search->closed_states, the_search->this_state);
129 
130  if (!keep_going ||
131  (the_search->num_states > wordrec_num_seg_states)) {
132  if (wordrec_debug_level > 1)
133  tprintf("Breaking best_first_search on keep_going %s numstates %d\n",
134  ((keep_going) ? "T" :"F"), the_search->num_states);
135  free_state (the_search->this_state);
136  break;
137  }
138 
139  FLOAT32 new_worst_priority = 2.0f * prioritize_state(chunks_record,
140  the_search);
141  if (new_worst_priority < worst_priority) {
142  if (wordrec_debug_level > 1)
143  tprintf("Lowering WorstPriority %f --> %f\n",
144  worst_priority, new_worst_priority);
145  // Tighten the threshold for admitting new paths as better search
146  // candidates are found. After lowering this threshold, we can safely
147  // popout everything that is worse than this score also.
148  worst_priority = new_worst_priority;
149  }
150  expand_node(worst_priority, chunks_record, the_search);
151  }
152 
153  if (wordrec_debug_level > 1) {
154  log_state("Done with", the_search->num_joints, &orig_state);
155  }
156  free_state (the_search->this_state);
157  num_popped++;
158  the_search->this_state = pop_queue (the_search->open_states);
159  if (wordrec_debug_level > 1 && !the_search->this_state)
160  tprintf("No more states to evalaute after %d evals", num_popped);
161  } while (the_search->this_state);
162 
163  state->part1 = the_search->best_state->part1;
164  state->part2 = the_search->best_state->part2;
165  if (wordrec_debug_level > 1) {
166  tprintf("\n\n\n =========== BestFirstSearch ==============\n");
167  // best_choice->debug_string().string());
168  word->best_choice->print("**Final BestChoice**");
169  }
170  // save the best_state stats
171  delete_search(the_search);
172 }
173 
180  float closeness;
181 
182  closeness = (the_search->num_joints ?
183  (hamming_distance(reinterpret_cast<uinT32*>(the_search->first_state),
184  reinterpret_cast<uinT32*>(the_search->best_state), 2) /
185  (float) the_search->num_joints) : 0.0f);
186 
187  free_state (the_search->first_state);
188  free_state (the_search->best_state);
189 
190  free_hash_table(the_search->closed_states);
191  FreeHeapData (the_search->open_states, (void_dest) free_state);
192 
193  memfree(the_search);
194 }
195 
196 
204  SEARCH_STATE search_state,
205  BlamerBundle *blamer_bundle) {
206  BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
207  BLOB_CHOICE_LIST *blob_choices;
208  BLOB_CHOICE_IT blob_choice_it;
209  int i;
210  int x = 0;
211  int y;
212 
213  // Iterate sub-paths.
214  for (i = 1; i <= search_state[0] + 1; i++) {
215  if (i > search_state[0])
216  y = count_blobs (chunks_record->chunks) - 1;
217  else
218  y = x + search_state[i];
219 
220  // Process one square.
221 
222  // Classify if needed.
223  blob_choices = get_piece_rating(chunks_record->ratings,
224  chunks_record->chunks,
225  chunks_record->word_res->denorm,
226  chunks_record->splits,
227  x, y, blamer_bundle);
228 
229  if (blob_choices == NULL) {
230  delete char_choices;
231  return (NULL);
232  }
233 
234  // Add permuted ratings.
235  blob_choice_it.set_to_list(blob_choices);
236  last_segmentation[i - 1].certainty = blob_choice_it.data()->certainty();
237  last_segmentation[i - 1].match = blob_choice_it.data()->rating();
238 
239  last_segmentation[i - 1].width =
240  AssociateUtils::GetChunksWidth(chunks_record->chunk_widths, x, y);
241  last_segmentation[i - 1].gap =
242  AssociateUtils::GetChunksGap(chunks_record->chunk_widths, y);
243 
244  *char_choices += blob_choices;
245  x = y + 1;
246  }
247  return (char_choices);
248 }
249 
257  SEARCH_RECORD *the_search,
258  DANGERR *fixpt,
259  BlamerBundle *blamer_bundle) {
260  BLOB_CHOICE_LIST_VECTOR *char_choices;
261  SEARCH_STATE chunk_groups;
262  float rating_limit = the_search->best_choice->rating();
263  bool keep_going = true;
264  PIECES_STATE widths;
265 
266  the_search->num_states++;
267  chunk_groups = bin_to_chunks(the_search->this_state,
268  the_search->num_joints);
269  bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
270  if (wordrec_debug_level > 1) {
271  log_state("Evaluating state", the_search->num_joints,
272  the_search->this_state);
273  }
274  getDict().LogNewSegmentation(widths);
275 
276  char_choices = evaluate_chunks(chunks_record, chunk_groups, blamer_bundle);
278  bool updated_best_choice = false;
279  if (char_choices != NULL && char_choices->length() > 0) {
280  // Compute the segmentation cost and include the cost in word rating.
281  // TODO(dsl): We should change the SEARCH_RECORD to store this cost
282  // from state evaluation and avoid recomputing it here.
283  prioritize_state(chunks_record, the_search);
285  updated_best_choice =
286  getDict().permute_characters(*char_choices,
287  the_search->best_choice,
288  the_search->raw_choice);
289  bool replaced = false;
290  if (updated_best_choice) {
291  if (getDict().AcceptableChoice(char_choices, the_search->best_choice,
292  NULL, ASSOCIATOR_CALLER, &replaced)) {
293  keep_going = false;
294  }
295  CopyCharChoices(*char_choices, the_search->best_char_choices);
296  }
297  }
299 
300 #ifndef GRAPHICS_DISABLED
302  display_segmentation (chunks_record->chunks, chunk_groups);
305  }
306 #endif
307 
308  if (rating_limit != the_search->best_choice->rating()) {
309  ASSERT_HOST(updated_best_choice);
310  the_search->before_best = the_search->num_states;
311  the_search->best_state->part1 = the_search->this_state->part1;
312  the_search->best_state->part2 = the_search->this_state->part2;
313  replace_char_widths(chunks_record, chunk_groups);
314  } else {
315  ASSERT_HOST(!updated_best_choice);
316  if (char_choices != NULL) fixpt->clear();
317  }
318 
319  if (char_choices != NULL) delete char_choices;
320  memfree(chunk_groups);
321 
322  return (keep_going);
323 }
324 
325 
333  WERD_RES *word,
334  STATE *state,
335  BLOB_CHOICE_LIST_VECTOR *old_choices,
336  MATRIX *ratings) {
337  // Initialize search_state, num_joints, x, y.
338  int num_joints = array_count(word->seam_array);
339 #ifndef GRAPHICS_DISABLED
341  print_state("Rebuilding state", state, num_joints);
342  }
343 #endif
344  // Setup the rebuild_word ready for the output blobs.
345  if (word->rebuild_word != NULL)
346  delete word->rebuild_word;
347  word->rebuild_word = new TWERD;
348  // Setup the best_state.
349  word->best_state.clear();
350  SEARCH_STATE search_state = bin_to_chunks(state, num_joints);
351  // See which index is which below for information on x and y.
352  int x = 0;
353  int y;
354  for (int i = 1; i <= search_state[0]; i++) {
355  y = x + search_state[i];
356  x = y + 1;
357  }
358  y = count_blobs(word->chopped_word->blobs) - 1;
359 
360  // Initialize char_choices, expanded_fragment_lengths:
361  // e.g. if fragment_lengths = {1 1 2 3 1},
362  // expanded_fragment_lengths_str = {1 1 2 2 3 3 3 1}.
363  BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
364  STRING expanded_fragment_lengths_str = "";
365  bool state_has_fragments = false;
366  const char *fragment_lengths = NULL;
367 
368  if (word->best_choice->length() > 0) {
369  fragment_lengths = word->best_choice->fragment_lengths();
370  }
371  if (fragment_lengths) {
372  for (int i = 0; i < word->best_choice->length(); ++i) {
373  *char_choices += NULL;
374  word->best_state.push_back(0);
375  if (fragment_lengths[i] > 1) {
376  state_has_fragments = true;
377  }
378  for (int j = 0; j < fragment_lengths[i]; ++j) {
379  expanded_fragment_lengths_str += fragment_lengths[i];
380  }
381  }
382  } else {
383  for (int i = 0; i <= search_state[0]; ++i) {
384  expanded_fragment_lengths_str += (char)1;
385  *char_choices += NULL;
386  word->best_state.push_back(0);
387  }
388  }
389 
390  // Set up variables for concatenating fragments.
391  const char *word_lengths_ptr = NULL;
392  const char *word_ptr = NULL;
393  if (state_has_fragments) {
394  // Make word_lengths_ptr point to the last element in
395  // best_choice->unichar_lengths().
396  word_lengths_ptr = word->best_choice->unichar_lengths().string();
397  word_lengths_ptr += (strlen(word_lengths_ptr)-1);
398  // Make word_str point to the beginning of the last
399  // unichar in best_choice->unichar_string().
400  word_ptr = word->best_choice->unichar_string().string();
401  word_ptr += (strlen(word_ptr)-*word_lengths_ptr);
402  }
403  const char *expanded_fragment_lengths =
404  expanded_fragment_lengths_str.string();
405  char unichar[UNICHAR_LEN + 1];
406 
407  // Populate char_choices list such that it corresponds to search_state.
408  //
409  // If we are rebuilding a state that contains character fragments:
410  // -- combine blobs that belong to character fragments
411  // -- re-classify the blobs to obtain choices list for the merged blob
412  // -- ensure that correct classification appears in the new choices list
413  // NOTE: a choice composed form original fragment choices will be always
414  // added to the new choices list for each character composed from
415  // fragments (even if the choice for the corresponding character appears
416  // in the re-classified choices list of for the newly merged blob).
417  int ss_index = search_state[0];
418  // Which index is which?
419  // char_choices_index refers to the finished product: there is one for each
420  // blob/unicharset entry in the final word.
421  // ss_index refers to the search_state, and indexes a group (chunk) of blobs
422  // that were classified together for the best state.
423  // old_choice_index is a copy of ss_index, and accesses the old_choices,
424  // which correspond to chunks in the best state. old_choice_index gets
425  // set to -1 on a fragment set, as there is no corresponding chunk in
426  // the best state.
427  // x and y refer to the underlying blobs and are the first and last blob
428  // indices in a chunk.
429  for (int char_choices_index = char_choices->length() - 1;
430  char_choices_index >= 0;
431  --char_choices_index) {
432  // The start and end of the blob to rebuild.
433  int true_x = x;
434  int true_y = y;
435  // The fake merged fragment choice.
436  BLOB_CHOICE* merged_choice = NULL;
437  // Test for and combine fragments first.
438  int fragment_pieces = expanded_fragment_lengths[ss_index];
439  int old_choice_index = ss_index;
440 
441  if (fragment_pieces > 1) {
442  strncpy(unichar, word_ptr, *word_lengths_ptr);
443  unichar[*word_lengths_ptr] = '\0';
444  merged_choice = rebuild_fragments(unichar, expanded_fragment_lengths,
445  old_choice_index, old_choices);
446  old_choice_index = -1;
447  }
448  while (fragment_pieces > 0) {
449  true_x = x;
450  // Move left to the previous blob.
451  y = x - 1;
452  x = y - search_state[ss_index--];
453  --fragment_pieces;
454  }
455  word->best_state[char_choices_index] = true_y + 1 - true_x;
456  BLOB_CHOICE_LIST *current_choices = join_blobs_and_classify(
457  word, true_x, true_y, old_choice_index, ratings, old_choices);
458  if (merged_choice != NULL) {
459  // Insert merged_blob into current_choices, such that current_choices
460  // are still sorted in non-descending order by rating.
461  ASSERT_HOST(!current_choices->empty());
462  BLOB_CHOICE_IT choice_it(current_choices);
463  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() &&
464  merged_choice->rating() > choice_it.data()->rating();
465  choice_it.forward());
466  choice_it.add_before_stay_put(merged_choice);
467  }
468  // Get rid of fragments in current_choices.
469  BLOB_CHOICE_IT choice_it(current_choices);
470  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
471  choice_it.forward()) {
473  choice_it.data()->unichar_id())) {
474  delete choice_it.extract();
475  }
476  }
477  char_choices->set(current_choices, char_choices_index);
478 
479  // Update word_ptr and word_lengths_ptr.
480  if (word_lengths_ptr != NULL && word_ptr != NULL) {
481  word_lengths_ptr--;
482  word_ptr -= (*word_lengths_ptr);
483  }
484  }
485  old_choices->delete_data_pointers();
486  delete old_choices;
487  memfree(search_state);
488 
489  return char_choices;
490 }
491 
499 void Wordrec::expand_node(FLOAT32 worst_priority,
500  CHUNKS_RECORD *chunks_record,
501  SEARCH_RECORD *the_search) {
502  STATE old_state;
503  int x;
504  uinT32 mask = 1 << (the_search->num_joints - 1 - 32);
505 
506  old_state.part1 = the_search->this_state->part1;
507  old_state.part2 = the_search->this_state->part2;
508 
509  // We need to expand the search more intelligently, or we get stuck
510  // with a bad starting segmentation in a long word sequence as in CJK.
511  // Expand a child node only if it is within the global bound, and no
512  // worse than 2x of its parent.
513  // TODO(dsl): There is some redudency here in recomputing the priority,
514  // and in filtering of old_merit and worst_priority.
515  the_search->this_state->part2 = old_state.part2;
516  for (x = the_search->num_joints; x > 32; x--) {
517  the_search->this_state->part1 = mask ^ old_state.part1;
518  if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
519  FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
520  if (new_merit < worst_priority) {
521  if (wordrec_debug_level > 1)
522  log_state("Pushing segstate", the_search->num_joints,
523  the_search->this_state, new_merit);
524  push_queue(the_search->open_states, the_search->this_state,
525  worst_priority, new_merit, wordrec_debug_level > 1);
526  } else {
527  if (wordrec_debug_level > 1)
528  log_state("Ignore weak segstate", the_search->num_joints,
529  the_search->this_state, new_merit);
530  }
531  }
532  mask >>= 1;
533  }
534 
535  if (the_search->num_joints > 32) {
536  mask = 1 << 31;
537  }
538  else {
539  mask = 1 << (the_search->num_joints - 1);
540  }
541 
542  the_search->this_state->part1 = old_state.part1;
543  while (x--) {
544  the_search->this_state->part2 = mask ^ old_state.part2;
545  if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
546  FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
547  if (new_merit < worst_priority) {
548  if (wordrec_debug_level > 1)
549  log_state("Pushing segstate", the_search->num_joints,
550  the_search->this_state, new_merit);
551  push_queue(the_search->open_states, the_search->this_state,
552  worst_priority, new_merit, wordrec_debug_level > 1);
553  } else {
554  if (wordrec_debug_level > 1)
555  log_state("Ignoring weak segstate", the_search->num_joints,
556  the_search->this_state, new_merit);
557  }
558  }
559  mask >>= 1;
560  }
561 }
562 
569  int num_joints,
570  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
571  WERD_CHOICE *best_choice,
572  WERD_CHOICE *raw_choice,
573  STATE *state) {
574  SEARCH_RECORD *this_search;
575 
576  this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));
577 
578  this_search->open_states = MakeHeap (wordrec_num_seg_states * 20);
579  this_search->closed_states = new_hash_table();
580 
581  if (state)
582  this_search->this_state = new_state (state);
583  else
584  cprintf ("error: bad initial state in new_search\n");
585 
586  this_search->first_state = new_state (this_search->this_state);
587  this_search->best_state = new_state (this_search->this_state);
588 
589  this_search->best_choice = best_choice;
590  this_search->raw_choice = raw_choice;
591  this_search->best_char_choices = best_char_choices;
592 
593  this_search->num_joints = num_joints;
594  this_search->num_states = 0;
595  this_search->before_best = 0;
596  this_search->segcost_bias = 0;
597 
598  return (this_search);
599 }
600 
608  HEAPENTRY entry;
609 
610  if (GetTopOfHeap (queue, &entry) == TESS_HEAP_OK) {
611 #ifndef GRAPHICS_DISABLED
613  cprintf ("eval state: %8.3f ", entry.Key);
614  print_state ("", (STATE *) entry.Data, num_joints);
615  }
616 #endif
617  return ((STATE *) entry.Data);
618  }
619  else {
620  return (NULL);
621  }
622 }
623 
629 void Wordrec::push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
630  FLOAT32 priority, bool debug) {
631  HEAPENTRY entry;
632 
633  if (priority < worst_priority) {
634  if (SizeOfHeap (queue) >= MaxSizeOfHeap(queue)) {
635  if (debug) tprintf("Heap is Full\n");
636  return;
637  }
638  entry.Data = (char *) new_state (state);
639  num_pushed++;
640  entry.Key = priority;
641  HeapStore(queue, &entry);
642  }
643 }
644 
652  SEARCH_STATE state) {
653  WIDTH_RECORD *width_record;
654  int num_blobs;
655  int i;
656 
657  free_widths (chunks_record->char_widths);
658 
659  num_blobs = state[0] + 1;
660  width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
661  width_record->num_chars = num_blobs;
662 
663  for (i = 0; i < num_blobs; i++) {
664 
665  width_record->widths[2 * i] = last_segmentation[i].width;
666 
667  if (i + 1 < num_blobs)
668  width_record->widths[2 * i + 1] = last_segmentation[i].gap;
669  }
670  chunks_record->char_widths = width_record;
671 }
672 
673 // Creates a fake blob choice from the combination of the given fragments.
674 // unichar is the class to be made from the combination,
675 // expanded_fragment_lengths[choice_index] is the number of fragments to use.
676 // old_choices[choice_index] has the classifier output for each fragment.
677 // choice index initially indexes the last fragment and should be decremented
678 // expanded_fragment_lengths[choice_index] times to get the earlier fragments.
679 // Guarantees to return something non-null, or abort!
681  const char* unichar,
682  const char* expanded_fragment_lengths,
683  int choice_index,
684  BLOB_CHOICE_LIST_VECTOR *old_choices) {
685  float rating = 0.0f;
686  float certainty = 0.0f;
687  inT16 min_xheight = -MAX_INT16;
688  inT16 max_xheight = MAX_INT16;
689  for (int fragment_pieces = expanded_fragment_lengths[choice_index] - 1;
690  fragment_pieces >= 0; --fragment_pieces, --choice_index) {
691  // Get a pointer to the classifier results from the old_choices.
692  BLOB_CHOICE_LIST *current_choices = old_choices->get(choice_index);
693  // Populate fragment with updated values and look for the
694  // fragment with the same values in current_choices.
695  // Update rating and certainty of the character being composed.
696  CHAR_FRAGMENT fragment;
697  fragment.set_all(unichar, fragment_pieces,
698  expanded_fragment_lengths[choice_index], false);
699  BLOB_CHOICE_IT choice_it(current_choices);
700  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
701  choice_it.forward()) {
702  BLOB_CHOICE* choice = choice_it.data();
703  const CHAR_FRAGMENT *current_fragment =
705  if (current_fragment && fragment.equals(current_fragment)) {
706  rating += choice->rating();
707  if (choice->certainty() < certainty) {
708  certainty = choice->certainty();
709  }
710  IntersectRange(choice->min_xheight(), choice->max_xheight(),
711  &min_xheight, &max_xheight);
712  break;
713  }
714  }
715  if (choice_it.cycled_list()) {
716  print_ratings_list("Failure", current_choices, unicharset);
717  tprintf("Failed to find fragment %s at index=%d\n",
718  fragment.to_string().string(), choice_index);
719  }
720  ASSERT_HOST(!choice_it.cycled_list()); // Be sure we found the fragment.
721  }
722  return new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar),
723  rating, certainty, -1, -1, 0,
724  min_xheight, max_xheight, false);
725 }
726 
727 // Creates a joined copy of the blobs between x and y (inclusive) and
728 // inserts as the first blob at word->rebuild_word->blobs.
729 // Returns a deep copy of the classifier results for the blob.
731  WERD_RES* word, int x, int y, int choice_index, MATRIX *ratings,
732  BLOB_CHOICE_LIST_VECTOR *old_choices) {
733  // Join parts to make the blob if needed.
734  if (x != y)
735  join_pieces(word->chopped_word->blobs, word->seam_array, x, y);
736  TBLOB *blob = word->chopped_word->blobs;
737  for (int i = 0; i < x; i++) {
738  blob = blob->next;
739  }
740  // Deep copy this blob into the output word.
741  TBLOB* copy_blob = new TBLOB(*blob);
742  copy_blob->next = word->rebuild_word->blobs;
743  word->rebuild_word->blobs = copy_blob;
744 
745  BLOB_CHOICE_LIST *choices = NULL;
746  // First check to see if we can look up the classificaiton
747  // in old_choices (if there is no need to merge blobs).
748  if (choice_index >= 0 && old_choices != NULL) {
749  choices = old_choices->get(choice_index);
750  old_choices->set(NULL, choice_index);
751  }
752  // The ratings matrix filled in by the associator will contain the next most
753  // up-to-date classification info. Thus we look up the classification there
754  // next, and only call classify_blob() if the classification is not found.
755  if (choices == NULL && ratings != NULL) {
756  choices = ratings->get(x, y);
757  if (choices != NOT_CLASSIFIED) {
758  ratings->put(x, y, NULL);
759  }
760  }
761  // Get the choices for the blob by classification if necessary.
762  if (choices == NULL) {
763  choices = classify_blob(blob, word->denorm, "rebuild", Orange,
764  word->blamer_bundle);
765  }
766  // Undo join_pieces to restore the chopped word to its fully chopped state.
767  if (x != y)
768  break_pieces(blob, word->seam_array, x, y);
769  return choices;
770 }
771 
772 } // namespace tesseract