Lucene++ - a full-featured, c++ search engine
API Documentation


 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
CharTokenizer.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2011 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef CHARTOKENIZER_H
8 #define CHARTOKENIZER_H
9 
10 #include "Tokenizer.h"
11 
12 namespace Lucene
13 {
15  class LPPAPI CharTokenizer : public Tokenizer
16  {
17  public:
18  CharTokenizer(ReaderPtr input);
21  virtual ~CharTokenizer();
22 
24 
25  protected:
26  int32_t offset;
27  int32_t bufferIndex;
28  int32_t dataLen;
29 
30  static const int32_t MAX_WORD_LEN;
31  static const int32_t IO_BUFFER_SIZE;
32 
33  CharArray ioBuffer;
36 
37  public:
38  virtual bool incrementToken();
39  virtual void end();
40  virtual void reset(ReaderPtr input);
41 
42  protected:
46  virtual bool isTokenChar(wchar_t c) = 0;
47 
50  virtual wchar_t normalize(wchar_t c);
51  };
52 }
53 
54 #endif

clucene.sourceforge.net