Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::WordListLangModel Class Reference

#include <word_list_lang_model.h>

Inheritance diagram for tesseract::WordListLangModel:
tesseract::LangModel

List of all members.

Public Member Functions

 WordListLangModel (CubeRecoContext *cntxt)
 ~WordListLangModel ()
LangModEdgeRoot ()
LangModEdge ** GetEdges (CharAltList *alt_list, LangModEdge *edge, int *edge_cnt)
bool IsValidSequence (const char_32 *sequence, bool eow_flag, LangModEdge **edges)
bool IsLeadingPunc (char_32 ch)
bool IsTrailingPunc (char_32 ch)
bool IsDigit (char_32 ch)
bool AddString (const char *char_ptr)
bool AddString32 (const char_32 *char_32_ptr)
- Public Member Functions inherited from tesseract::LangModel
 LangModel ()
virtual ~LangModel ()
bool OOD ()
bool Numeric ()
bool WordList ()
bool Punc ()
void SetOOD (bool ood)
void SetNumeric (bool numeric)
void SetWordList (bool word_list)
void SetPunc (bool punc_enabled)

Static Public Member Functions

static void WordVariants (const CharSet &char_set, const UNICHARSET *uchset, string_32 str32, vector< WERD_CHOICE * > *word_variants)

Additional Inherited Members

- Protected Attributes inherited from tesseract::LangModel
bool ood_enabled_
bool numeric_enabled_
bool word_list_enabled_
bool punc_enabled_

Detailed Description

Definition at line 39 of file word_list_lang_model.h.


Constructor & Destructor Documentation

tesseract::WordListLangModel::WordListLangModel ( CubeRecoContext cntxt)
explicit

Definition at line 29 of file word_list_lang_model.cpp.

{
cntxt_ = cntxt;
dawg_ = NULL;
init_ = false;
}
tesseract::WordListLangModel::~WordListLangModel ( )

Definition at line 35 of file word_list_lang_model.cpp.

{
Cleanup();
}

Member Function Documentation

bool tesseract::WordListLangModel::AddString ( const char *  char_ptr)

Definition at line 167 of file word_list_lang_model.cpp.

{
if (!init_ && !Init()) { // initialize if necessary
return false;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(char_ptr, &str32);
if (str32.length() < 1) {
return false;
}
return AddString32(str32.c_str());
}
bool tesseract::WordListLangModel::AddString32 ( const char_32 char_32_ptr)

Definition at line 181 of file word_list_lang_model.cpp.

{
if (char_32_ptr == NULL) {
return false;
}
// get all the word variants
vector<WERD_CHOICE *> word_variants;
WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(),
char_32_ptr, &word_variants);
if (word_variants.size() > 0) {
// find the shortest variant
int shortest_word = 0;
for (int word = 1; word < word_variants.size(); word++) {
if (word_variants[shortest_word]->length() >
word_variants[word]->length()) {
shortest_word = word;
}
}
// only add the shortest grapheme interpretation of string to the word list
dawg_->add_word_to_dawg(*word_variants[shortest_word]);
}
for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; }
return true;
}
LangModEdge ** tesseract::WordListLangModel::GetEdges ( CharAltList alt_list,
LangModEdge edge,
int *  edge_cnt 
)
virtual

Implements tesseract::LangModel.

Definition at line 71 of file word_list_lang_model.cpp.

{
// initialize if necessary
if (init_ == false) {
if (Init() == false) {
return false;
}
}
(*edge_cnt) = 0;
EDGE_REF edge_ref;
TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge);
if (tess_lm_edge == NULL) {
edge_ref = 0;
} else {
edge_ref = tess_lm_edge->EndEdge();
// advance node
edge_ref = dawg_->next_node(edge_ref);
if (edge_ref == 0) {
return 0;
}
}
// allocate memory for edges
LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
if (edge_array == NULL) {
return NULL;
}
// now get all the emerging edges
(*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
edge_array + (*edge_cnt));
return edge_array;
}
bool tesseract::WordListLangModel::IsDigit ( char_32  ch)
inlinevirtual

Implements tesseract::LangModel.

Definition at line 58 of file word_list_lang_model.h.

{ return false; } // not yet implemented
bool tesseract::WordListLangModel::IsLeadingPunc ( char_32  ch)
inlinevirtual

Implements tesseract::LangModel.

Definition at line 56 of file word_list_lang_model.h.

{ return false; } // not yet implemented
bool tesseract::WordListLangModel::IsTrailingPunc ( char_32  ch)
inlinevirtual

Implements tesseract::LangModel.

Definition at line 57 of file word_list_lang_model.h.

{ return false; } // not yet implemented
bool tesseract::WordListLangModel::IsValidSequence ( const char_32 sequence,
bool  eow_flag,
LangModEdge **  edges 
)
virtual

Implements tesseract::LangModel.

Definition at line 114 of file word_list_lang_model.cpp.

{
return false;
}
LangModEdge * tesseract::WordListLangModel::Root ( )
virtual

Implements tesseract::LangModel.

Definition at line 66 of file word_list_lang_model.cpp.

{
return NULL;
}
void tesseract::WordListLangModel::WordVariants ( const CharSet char_set,
const UNICHARSET uchset,
string_32  str32,
vector< WERD_CHOICE * > *  word_variants 
)
static

Definition at line 154 of file word_list_lang_model.cpp.

{
for (int i = 0; i < word_variants->size(); i++) {
delete (*word_variants)[i];
}
word_variants->clear();
string_32 prefix_str32;
WERD_CHOICE word_so_far(uchset);
WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
}

The documentation for this class was generated from the following files: