21 #pragma warning(disable:4244) // Conversion warnings
37 debug_level_ = debug_level;
38 data_file_ = fopen(data_file_name,
"rb");
39 if (data_file_ ==
NULL) {
40 tprintf(
"Error opening data file %s\n", data_file_name);
41 tprintf(
"Please make sure the TESSDATA_PREFIX environment variable is set "
42 "to the parent directory of your \"tessdata\" directory.\n");
45 fread(&actual_tessdata_num_entries_,
sizeof(
inT32), 1, data_file_);
46 swap_ = (actual_tessdata_num_entries_ > kMaxNumTessdataEntries);
48 actual_tessdata_num_entries_ =
reverse32(actual_tessdata_num_entries_);
51 fread(offset_table_,
sizeof(
inT64),
52 actual_tessdata_num_entries_, data_file_);
54 for (i = 0 ; i < actual_tessdata_num_entries_; ++i) {
55 offset_table_[i] =
reverse64(offset_table_[i]);
59 tprintf(
"TessdataManager loaded %d types of tesseract data files.\n",
60 actual_tessdata_num_entries_);
61 for (i = 0; i < actual_tessdata_num_entries_; ++i) {
62 tprintf(
"Offset for type %d is %lld\n", i, offset_table_[i]);
69 bool newline_end,
inT64 num_bytes_to_copy) {
70 if (num_bytes_to_copy == 0)
return;
71 int buffer_size = 1024;
72 if (num_bytes_to_copy > 0 && buffer_size > num_bytes_to_copy) {
73 buffer_size = num_bytes_to_copy;
75 inT64 num_bytes_copied = 0;
76 char *chunk =
new char[buffer_size];
79 while ((bytes_read = fread(chunk,
sizeof(
char),
80 buffer_size, input_file))) {
81 fwrite(chunk,
sizeof(
char), bytes_read, output_file);
82 last_char = chunk[bytes_read-1];
83 if (num_bytes_to_copy > 0) {
84 num_bytes_copied += bytes_read;
85 if (num_bytes_copied == num_bytes_to_copy)
break;
86 if (num_bytes_copied + buffer_size > num_bytes_to_copy) {
87 buffer_size = num_bytes_to_copy - num_bytes_copied;
96 fseek(output_file, 0, SEEK_SET);
98 fwrite(&num_entries,
sizeof(
inT32), 1, output_file);
102 tprintf(
"TessdataManager combined tesseract data files.\n");
104 tprintf(
"Offset for type %d is %lld\n", i, offset_table[i]);
109 const char *language_data_path_prefix,
110 const char *output_filename) {
114 FILE *output_file = fopen(output_filename,
"wb");
115 if (output_file ==
NULL) {
116 tprintf(
"Error opening %s for writing\n", output_filename);
121 sizeof(
inT32) +
sizeof(
inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET);
124 bool text_file =
false;
130 kTessdataFileSuffixes[i], &type, &text_file));
132 filename += kTessdataFileSuffixes[i];
133 file_ptr[i] = fopen(filename.
string(),
"rb");
134 if (file_ptr[i] !=
NULL) {
135 offset_table[type] = ftell(output_file);
136 CopyFile(file_ptr[i], output_file, text_file, -1);
143 tprintf(
"Error opening unicharset file\n");
150 tprintf(
"Error opening pffmtable and/or normproto files"
151 " while inttemp file was present\n");
161 const char *new_traineddata_filename,
162 char **component_filenames,
163 int num_new_components) {
167 bool text_file =
false;
170 offset_table[i] = -1;
173 FILE *output_file = fopen(new_traineddata_filename,
"wb");
174 if (output_file ==
NULL) {
175 tprintf(
"Error opening %s for writing\n", new_traineddata_filename);
181 sizeof(
inT32) +
sizeof(
inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET);
184 for (i = 0; i < num_new_components; ++i) {
186 file_ptr[type] = fopen(component_filenames[i],
"rb");
191 if (file_ptr[i] !=
NULL) {
193 offset_table[i] = ftell(output_file);
194 CopyFile(file_ptr[i], output_file, kTessdataFileIsText[i], -1);
199 offset_table[i] = ftell(output_file);
200 CopyFile(data_file_, output_file, kTessdataFileIsText[i],
202 ftell(data_file_) + 1);
212 const char *suffix,
TessdataType *type,
bool *text_file) {
214 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
216 *text_file = kTessdataFileIsText[i];
220 printf(
"TessdataManager can't determine which tessdata"
221 " component is represented by %s\n", suffix);
228 const char *suffix = strrchr(filename,
'.');
229 if (suffix ==
NULL || *(++suffix) ==
'\0')
return false;
235 bool text_file =
false;
237 filename, &type, &text_file));
240 FILE *output_file = fopen(filename,
"wb");
241 if (output_file ==
NULL) {
242 printf(
"Error openning %s\n", filename);
249 end_offset - begin_offset + 1);