Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
boxword.h
Go to the documentation of this file.
1 
2 // File: boxword.h
3 // Description: Class to represent the bounding boxes of the output.
4 // Author: Ray Smith
5 // Created: Tue May 25 14:18:14 PDT 2010
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_CSTRUCT_BOXWORD_H__
21 #define TESSERACT_CSTRUCT_BOXWORD_H__
22 
23 #include "genericvector.h"
24 #include "rect.h"
25 
26 class BLOCK;
27 class DENORM;
28 class PBLOB_LIST;
29 struct TWERD;
30 class UNICHARSET;
31 class WERD;
32 class WERD_CHOICE;
33 class WERD_RES;
34 
35 namespace tesseract {
36 
37 // ScriptPos tells whether a character is subscript, superscript or normal.
38 enum ScriptPos {
43 };
44 
45 // Class to hold an array of bounding boxes for an output word and
46 // the bounding box of the whole word.
47 class BoxWord {
48  public:
49  BoxWord();
50  explicit BoxWord(const BoxWord& src);
51  ~BoxWord();
52 
53  BoxWord& operator=(const BoxWord& src);
54 
55  void CopyFrom(const BoxWord& src);
56 
57  // Factory to build a BoxWord from a TWERD and the DENORM to switch
58  // back to original image coordinates.
59  // If the denorm is not NULL, then the output is denormalized and rotated
60  // back to the original image coordinates.
61  static BoxWord* CopyFromNormalized(const DENORM* denorm,
62  TWERD* tessword);
63 
64  // Sets up the script_pos_ member using the tessword to get the bln
65  // bounding boxes, the best_choice to get the unichars, and the unicharset
66  // to get the target positions. If small_caps is true, sub/super are not
67  // considered, but dropcaps are.
68  void SetScriptPositions(const UNICHARSET& unicharset, bool small_caps,
69  TWERD* tessword, WERD_CHOICE* best_choice);
70 
71  // Clean up the bounding boxes from the polygonal approximation by
72  // expanding slightly, then clipping to the blobs from the original_word
73  // that overlap. If not null, the block provides the inverse rotation.
74  void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
75 
76  // Merges the boxes from start to end, not including end, and deletes
77  // the boxes between start and end.
78  void MergeBoxes(int start, int end);
79 
80  // Inserts a new box before the given index.
81  // Recomputes the bounding box.
82  void InsertBox(int index, const TBOX& box);
83 
84  // Deletes the box with the given index, and shuffles up the rest.
85  // Recomputes the bounding box.
86  void DeleteBox(int index);
87 
88  // Deletes all the boxes stored in BoxWord.
89  void DeleteAllBoxes();
90 
91  // This and other putatively are the same, so call the (permanent) callback
92  // for each blob index where the bounding boxes match.
93  // The callback is deleted on completion.
94  void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
95 
96  const TBOX& bounding_box() const {
97  return bbox_;
98  }
99  const int length() const {
100  return length_;
101  }
102  const TBOX& BlobBox(int index) const {
103  return boxes_[index];
104  }
105  ScriptPos BlobPosition(int index) const {
106  if (index < 0 || index >= script_pos_.size())
107  return SP_NORMAL;
108  return script_pos_[index];
109  }
110 
111  private:
112  void ComputeBoundingBox();
113 
114  TBOX bbox_;
115  int length_;
116  GenericVector<TBOX> boxes_;
117  GenericVector<ScriptPos> script_pos_;
118 };
119 
120 } // namespace tesseract.
121 
122 
123 #endif // TESSERACT_CSTRUCT_BOXWORD_H__