Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
protos.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: protos.c (Formerly protos.c)
5  * Description:
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Mon Mar 4 14:51:24 1991 (Dan Johnson) danj@hpgrlj
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "protos.h"
29 #include "const.h"
30 #include "emalloc.h"
31 #include "freelist.h"
32 #include "callcpp.h"
33 #include "tprintf.h"
34 #include "scanutils.h"
35 #include "globals.h"
36 #include "classify.h"
37 #include "params.h"
38 
39 #include <stdio.h>
40 #include <math.h>
41 
42 #define PROTO_INCREMENT 32
43 #define CONFIG_INCREMENT 16
44 
45 /*----------------------------------------------------------------------
46  V a r i a b l e s
47 ----------------------------------------------------------------------*/
49 
50 STRING_VAR(classify_training_file, "MicroFeatures", "Training file");
51 
52 /*----------------------------------------------------------------------
53  F u n c t i o n s
54 ----------------------------------------------------------------------*/
64  int NewNumConfigs;
65  int NewConfig;
66  int MaxNumProtos;
68 
69  MaxNumProtos = Class->MaxNumProtos;
70 
71  if (Class->NumConfigs >= Class->MaxNumConfigs) {
72  /* add configs in CONFIG_INCREMENT chunks at a time */
73  NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) /
75 
76  Class->Configurations =
78  sizeof (BIT_VECTOR) * NewNumConfigs);
79 
80  Class->MaxNumConfigs = NewNumConfigs;
81  }
82  NewConfig = Class->NumConfigs++;
83  Config = NewBitVector (MaxNumProtos);
84  Class->Configurations[NewConfig] = Config;
85  zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos));
86 
87  return (NewConfig);
88 }
89 
90 
100  int i;
101  int Bit;
102  int NewNumProtos;
103  int NewProto;
105 
106  if (Class->NumProtos >= Class->MaxNumProtos) {
107  /* add protos in PROTO_INCREMENT chunks at a time */
108  NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) /
110 
111  Class->Prototypes = (PROTO) Erealloc (Class->Prototypes,
112  sizeof (PROTO_STRUCT) *
113  NewNumProtos);
114 
115  Class->MaxNumProtos = NewNumProtos;
116 
117  for (i = 0; i < Class->NumConfigs; i++) {
118  Config = Class->Configurations[i];
119  Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos);
120 
121  for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++)
122  reset_bit(Config, Bit);
123  }
124  }
125  NewProto = Class->NumProtos++;
126  if (Class->NumProtos > MAX_NUM_PROTOS) {
127  tprintf("Ouch! number of protos = %d, vs max of %d!",
128  Class->NumProtos, MAX_NUM_PROTOS);
129  }
130  return (NewProto);
131 }
132 
133 
143  inT16 Pid;
144  FLOAT32 TotalLength = 0;
145 
146  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
147  if (test_bit (Config, Pid)) {
148 
149  TotalLength += (ProtoIn (Class, Pid))->Length;
150  }
151  }
152  return (TotalLength);
153 }
154 
155 
164  inT16 Pid;
165  FLOAT32 TotalLength = 0;
166 
167  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
168  TotalLength += (ProtoIn (Class, Pid))->Length;
169  }
170  return (TotalLength);
171 }
172 
173 
182 void CopyProto(PROTO Src, PROTO Dest) {
183  Dest->X = Src->X;
184  Dest->Y = Src->Y;
185  Dest->Length = Src->Length;
186  Dest->Angle = Src->Angle;
187  Dest->A = Src->A;
188  Dest->B = Src->B;
189  Dest->C = Src->C;
190 }
191 
192 
193 /**********************************************************************
194  * FillABC
195  *
196  * Fill in Protos A, B, C fields based on the X, Y, Angle fields.
197  **********************************************************************/
198 void FillABC(PROTO Proto) {
199  FLOAT32 Slope, Intercept, Normalizer;
200 
201  Slope = tan (Proto->Angle * 2.0 * PI);
202  Intercept = Proto->Y - Slope * Proto->X;
203  Normalizer = 1.0 / sqrt (Slope * Slope + 1.0);
204  Proto->A = Slope * Normalizer;
205  Proto->B = -Normalizer;
206  Proto->C = Intercept * Normalizer;
207 }
208 
209 
210 /**********************************************************************
211  * FreeClass
212  *
213  * Deallocate the memory consumed by the specified class.
214  **********************************************************************/
215 void FreeClass(CLASS_TYPE Class) {
216  if (Class) {
217  FreeClassFields(Class);
218  delete Class;
219  }
220 }
221 
222 
223 /**********************************************************************
224  * FreeClassFields
225  *
226  * Deallocate the memory consumed by subfields of the specified class.
227  **********************************************************************/
229  int i;
230 
231  if (Class) {
232  if (Class->MaxNumProtos > 0)
233  memfree (Class->Prototypes);
234  if (Class->MaxNumConfigs > 0) {
235  for (i = 0; i < Class->NumConfigs; i++)
236  FreeBitVector (Class->Configurations[i]);
237  memfree (Class->Configurations);
238  }
239  }
240 }
241 
242 /**********************************************************************
243  * NewClass
244  *
245  * Allocate a new class with enough memory to hold the specified number
246  * of prototypes and configurations.
247  **********************************************************************/
248 CLASS_TYPE NewClass(int NumProtos, int NumConfigs) {
249  CLASS_TYPE Class;
250 
251  Class = new CLASS_STRUCT;
252 
253  if (NumProtos > 0)
254  Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT));
255 
256  if (NumConfigs > 0)
257  Class->Configurations = (CONFIGS) Emalloc (NumConfigs *
258  sizeof (BIT_VECTOR));
259  Class->MaxNumProtos = NumProtos;
260  Class->MaxNumConfigs = NumConfigs;
261  Class->NumProtos = 0;
262  Class->NumConfigs = 0;
263  return (Class);
264 
265 }
266 
267 
268 /**********************************************************************
269  * PrintProtos
270  *
271  * Print the list of prototypes in this class type.
272  **********************************************************************/
273 void PrintProtos(CLASS_TYPE Class) {
274  inT16 Pid;
275 
276  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
277  cprintf ("Proto %d:\t", Pid);
278  PrintProto (ProtoIn (Class, Pid));
279  cprintf ("\t");
280  PrintProtoLine (ProtoIn (Class, Pid));
281  new_line();
282  }
283 }
284 
285 
286 namespace tesseract {
294  FILE *File;
295  char TextLine[CHARS_PER_LINE];
296  char unichar[CHARS_PER_LINE];
297 
298  cprintf ("Reading training data from '%s' ...",
299  static_cast<STRING>(classify_training_file).string());
300  fflush(stdout);
301 
302  File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
303  while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
304 
305  sscanf(TextLine, "%s", unichar);
306  ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
307  fgets(TextLine, CHARS_PER_LINE, File);
308  fgets(TextLine, CHARS_PER_LINE, File);
309  }
310  fclose(File);
311  new_line();
312 }
313 } // namespace tesseract
314 
321 void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
322  CLASS_TYPE Class;
323 
324  Class = &TrainingData[unichar_id];
325 
326  ReadProtos(File, Class);
327 
328  ReadConfigs(File, Class);
329 }
330 
337 void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
338  inT16 Cid;
339  register inT16 Wid;
340  register BIT_VECTOR ThisConfig;
341  int NumWords;
342  int NumConfigs;
343 
344  fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
345  Class->NumConfigs = NumConfigs;
346  Class->MaxNumConfigs = NumConfigs;
347  Class->Configurations =
348  (CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
349  NumWords = WordsInVectorOfSize (Class->NumProtos);
350 
351  for (Cid = 0; Cid < NumConfigs; Cid++) {
352 
353  ThisConfig = NewBitVector (Class->NumProtos);
354  for (Wid = 0; Wid < NumWords; Wid++)
355  fscanf (File, "%x", &ThisConfig[Wid]);
356  Class->Configurations[Cid] = ThisConfig;
357  }
358 }
359 
360 
367 void ReadProtos(register FILE *File, CLASS_TYPE Class) {
368  register inT16 Pid;
369  register PROTO Proto;
370  int NumProtos;
371 
372  fscanf (File, "%d\n", &NumProtos);
373  Class->NumProtos = NumProtos;
374  Class->MaxNumProtos = NumProtos;
375  Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
376 
377  for (Pid = 0; Pid < NumProtos; Pid++) {
378  Proto = ProtoIn (Class, Pid);
379  fscanf (File, "%f %f %f %f %f %f %f\n",
380  &Proto->X,
381  &Proto->Y,
382  &Proto->Length,
383  &Proto->Angle,
384  &Proto->A,
385  &Proto->B, &Proto->C);
386  }
387 }
388 
389 
399 int SplitProto(CLASS_TYPE Class, int OldPid) {
400  int i;
401  int NewPid;
403 
404  NewPid = AddProtoToClass (Class);
405 
406  for (i = 0; i < Class->NumConfigs; i++) {
407  Config = Class->Configurations[i];
408  if (test_bit (Config, OldPid))
409  SET_BIT(Config, NewPid);
410  }
411  return (NewPid);
412 }
413 
414 
425 void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
426  int Cid, Pid;
428 
429  fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
430 
431  for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
432  fprintf (File, "1 ");
433 
434  Config = Class->Configurations[Cid];
435 
436  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
437  if (test_bit (Config, Pid))
438  fprintf (File, "1");
439  else
440  fprintf (File, "0");
441  }
442  fprintf (File, "\n");
443  }
444 }
445 
446 
457 void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
458  int Pid;
459  PROTO Proto;
460 
461  /* print old header */
462  fprintf (File, "6\n");
463  fprintf (File, "linear essential -0.500000 0.500000\n");
464  fprintf (File, "linear essential -0.250000 0.750000\n");
465  fprintf (File, "linear essential 0.000000 1.000000\n");
466  fprintf (File, "circular essential 0.000000 1.000000\n");
467  fprintf (File, "linear non-essential -0.500000 0.500000\n");
468  fprintf (File, "linear non-essential -0.500000 0.500000\n");
469 
470  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
471  Proto = ProtoIn (Class, Pid);
472 
473  fprintf (File, "significant elliptical 1\n");
474  fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
475  Proto->X, Proto->Y,
476  Proto->Length, Proto->Angle, 0.0, 0.0);
477  fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
478  0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
479  }
480 }