Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

List of all members.

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
virtual ~LTRResultIterator ()
char * GetUTF8Text (PageIteratorLevel level) const
void SetLineSeparator (const char *new_line)
void SetParagraphSeparator (const char *new_para)
float Confidence (PageIteratorLevel level) const
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
const char * WordRecognitionLanguage () const
StrongScriptDirection WordDirection () const
bool WordIsFromDictionary () const
bool WordIsNumeric () const
bool HasBlamerInfo () const
void * GetParamsTrainingBundle () const
const char * GetBlamerDebug () const
const char * GetBlamerMisadaptionDebug () const
char * WordTruthUTF8Text () const
const char * WordLattice (int *lattice_size) const
bool SymbolIsSuperscript () const
bool SymbolIsSubscript () const
bool SymbolIsDropcap () const
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
virtual ~PageIterator ()
 PageIterator (const PageIterator &src)
const PageIteratoroperator= (const PageIterator &src)
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
virtual void Begin ()
virtual void RestartParagraph ()
bool IsWithinFirstTextlineOfParagraph () const
virtual void RestartRow ()
virtual bool Next (PageIteratorLevel level)
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
int Cmp (const PageIterator &other) const
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool Empty (PageIteratorLevel level) const
PolyBlockType BlockType () const
Pix * GetBinaryImage (PageIteratorLevel level) const
Pix * GetImage (PageIteratorLevel level, int padding, int *left, int *top) const
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const

Protected Attributes

const char * line_separator_
const char * paragraph_separator_
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
Tesseracttesseract_
PAGE_RES_ITit_
WERDword_
int word_length_
int blob_index_
C_BLOB_IT * cblob_it_
int scale_
int scaled_yres_
int rect_left_
int rect_top_
int rect_width_
int rect_height_

Friends

class ChoiceIterator

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void BeginWord (int offset)

Detailed Description

Definition at line 46 of file ltrresultiterator.h.


Constructor & Destructor Documentation

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 30 of file ltrresultiterator.cpp.

: PageIterator(page_res, tesseract, scale, scaled_yres,
rect_left, rect_top, rect_width, rect_height),
}
tesseract::LTRResultIterator::~LTRResultIterator ( )
virtual

Definition at line 40 of file ltrresultiterator.cpp.

{
}

Member Function Documentation

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 94 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return 0.0f; // Already at the end!
float mean_certainty = 0.0f;
int certainty_count = 0;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
switch (level) {
case RIL_BLOCK:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block());
break;
case RIL_PARA:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block() &&
res_it.row()->row->para() == res_it.prev_row()->row->para());
break;
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.row() == res_it.prev_row());
break;
case RIL_WORD:
mean_certainty += best_choice->certainty();
++certainty_count;
break;
case RIL_SYMBOL:
BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();
if (choices != NULL) {
BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);
for (int blob = 0; blob < blob_index_; ++blob)
blob_choices_it.forward();
BLOB_CHOICE_IT choice_it(blob_choices_it.data());
for (choice_it.mark_cycle_pt();
!choice_it.cycled_list();
choice_it.forward()) {
if (choice_it.data()->unichar_id() ==
best_choice->unichar_id(blob_index_))
break;
}
mean_certainty += choice_it.data()->certainty();
} else {
mean_certainty += best_choice->certainty();
}
++certainty_count;
}
if (certainty_count > 0) {
mean_certainty /= certainty_count;
float confidence = 100 + 5 * mean_certainty;
if (confidence < 0.0f) confidence = 0.0f;
if (confidence > 100.0f) confidence = 100.0f;
return confidence;
}
return 0.0f;
}
const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 254 of file ltrresultiterator.cpp.

{
}
const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 260 of file ltrresultiterator.cpp.

void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 247 of file ltrresultiterator.cpp.

{
return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
}
char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Reimplemented in tesseract::ResultIterator.

Definition at line 45 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return NULL; // Already at the end!
STRING text;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
if (level == RIL_SYMBOL) {
text = res_it.word()->BestUTF8(blob_index_, false);
} else if (level == RIL_WORD) {
text = best_choice->unichar_string();
} else {
bool eol = false; // end of line?
bool eop = false; // end of paragraph?
do { // for each paragraph in a block
do { // for each text line in a paragraph
do { // for each word in a text line
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
text += best_choice->unichar_string();
text += " ";
res_it.forward();
eol = res_it.row() != res_it.prev_row();
} while (!eol);
text.truncate_at(text.length() - 1);
text += line_separator_;
eop = res_it.block() != res_it.prev_block() ||
res_it.row()->row->para() != res_it.prev_row()->row->para();
} while (level != RIL_TEXTLINE && !eop);
if (eop) text += paragraph_separator_;
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char* result = new char[length];
strncpy(result, text.string(), length);
return result;
}
bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 239 of file ltrresultiterator.cpp.

{
return (it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
}
void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 83 of file ltrresultiterator.cpp.

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 88 of file ltrresultiterator.cpp.

{
}
bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 312 of file ltrresultiterator.cpp.

{
if (cblob_it_ == NULL && it_->word() != NULL)
return false;
}
bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 303 of file ltrresultiterator.cpp.

{
if (cblob_it_ == NULL && it_->word() != NULL)
return false;
}
bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 294 of file ltrresultiterator.cpp.

{
if (cblob_it_ == NULL && it_->word() != NULL)
return false;
}
StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 210 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return DIR_NEUTRAL;
bool has_rtl = it_->word()->AnyRtlCharsInWord();
bool has_ltr = it_->word()->AnyLtrCharsInWord();
if (has_rtl && !has_ltr)
if (has_ltr && !has_rtl)
if (!has_ltr && !has_rtl)
return DIR_NEUTRAL;
return DIR_MIX;
}
const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 172 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->fontinfo == NULL) {
*font_id = -1;
return NULL; // No font information.
}
const FontInfo& font_info = *it_->word()->fontinfo;
*font_id = font_info.universal_id;
*is_bold = font_info.is_bold();
*is_italic = font_info.is_italic();
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info.is_fixed_pitch();
*is_serif = font_info.is_serif();
*is_smallcaps = it_->word()->small_caps;
float row_height = it_->row()->row->x_height() +
it_->row()->row->ascenders() - it_->row()->row->descenders();
// Convert from pixels to printers points.
*pointsize = scaled_yres_ > 0
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
: 0;
return font_info.name;
}
bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 224 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return false; // Already at the end!
int permuter = it_->word()->best_choice->permuter();
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
permuter == USER_DAWG_PERM;
}
bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 232 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return false; // Already at the end!
int permuter = it_->word()->best_choice->permuter();
return permuter == NUMBER_PERM;
}
const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 284 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->blamer_bundle == NULL) return NULL;
*lattice_size = it_->word()->blamer_bundle->lattice_size;
}
const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 204 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
return it_->word()->tesseract->lang.string();
}
char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 266 of file ltrresultiterator.cpp.

{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->blamer_bundle == NULL ||
return NULL; // no truth information for this word
}
const GenericVector<STRING> &truth_vec =
STRING truth_text;
for (int i = 0; i < truth_vec.size(); ++i) truth_text += truth_vec[i];
int length = truth_text.length() + 1;
char* result = new char[length];
strncpy(result, truth_text.string(), length);
return result;
}

Friends And Related Function Documentation

friend class ChoiceIterator
friend

Definition at line 47 of file ltrresultiterator.h.


Member Data Documentation

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 165 of file ltrresultiterator.h.

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 166 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: