Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pageiterator.h
Go to the documentation of this file.
1 
2 // File: pageiterator.h
3 // Description: Iterator for tesseract page structure that avoids using
4 // tesseract internal data structures.
5 // Author: Ray Smith
6 // Created: Fri Feb 26 11:01:06 PST 2010
7 //
8 // (C) Copyright 2010, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_CCMAIN_PAGEITERATOR_H__
22 #define TESSERACT_CCMAIN_PAGEITERATOR_H__
23 
24 #include "publictypes.h"
25 #include "platform.h"
26 
27 class C_BLOB_IT;
28 class PBLOB_IT;
29 class PAGE_RES;
30 class PAGE_RES_IT;
31 class WERD;
32 struct Pix;
33 
34 namespace tesseract {
35 
36 class Tesseract;
37 
52  public:
68  int scale, int scaled_yres,
69  int rect_left, int rect_top,
70  int rect_width, int rect_height);
71  virtual ~PageIterator();
72 
79  PageIterator(const PageIterator& src);
80  const PageIterator& operator=(const PageIterator& src);
81 
83  bool PositionedAtSameWord(const PAGE_RES_IT* other) const;
84 
85  // ============= Moving around within the page ============.
86 
91  virtual void Begin();
92 
98  virtual void RestartParagraph();
99 
104  bool IsWithinFirstTextlineOfParagraph() const;
105 
111  virtual void RestartRow();
112 
124  virtual bool Next(PageIteratorLevel level);
125 
139  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
140 
157  virtual bool IsAtFinalElement(PageIteratorLevel level,
158  PageIteratorLevel element) const;
159 
166  int Cmp(const PageIterator &other) const;
167 
168  // ============= Accessing data ==============.
169  // Coordinate system:
170  // Integer coordinates are at the cracks between the pixels.
171  // The top-left corner of the top-left pixel in the image is at (0,0).
172  // The bottom-right corner of the bottom-right pixel in the image is at
173  // (width, height).
174  // Every bounding box goes from the top-left of the top-left contained
175  // pixel to the bottom-right of the bottom-right contained pixel, so
176  // the bounding box of the single top-left pixel in the image is:
177  // (0,0)->(1,1).
178  // If an image rectangle has been set in the API, then returned coordinates
179  // relate to the original (full) image, rather than the rectangle.
180 
190  bool BoundingBox(PageIteratorLevel level,
191  int* left, int* top, int* right, int* bottom) const;
197  bool BoundingBoxInternal(PageIteratorLevel level,
198  int* left, int* top, int* right, int* bottom) const;
199 
201  bool Empty(PageIteratorLevel level) const;
202 
207  PolyBlockType BlockType() const;
208 
215  Pix* GetBinaryImage(PageIteratorLevel level) const;
216 
227  Pix* GetImage(PageIteratorLevel level, int padding,
228  int* left, int* top) const;
229 
236  bool Baseline(PageIteratorLevel level,
237  int* x1, int* y1, int* x2, int* y2) const;
238 
247  void Orientation(tesseract::Orientation *orientation,
248  tesseract::WritingDirection *writing_direction,
249  tesseract::TextlineOrder *textline_order,
250  float *deskew_angle) const;
251 
280  void ParagraphInfo(tesseract::ParagraphJustification *justification,
281  bool *is_list_item,
282  bool *is_crown,
283  int *first_line_indent) const;
284 
285  protected:
290  TESS_LOCAL void BeginWord(int offset);
291 
315  C_BLOB_IT* cblob_it_;
317  int scale_;
323 };
324 
325 } // namespace tesseract.
326 
327 #endif // TESSERACT_CCMAIN_PAGEITERATOR_H__