00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2002 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 #include "unicode/brkiter.h" 00019 #include "unicode/udata.h" 00020 #include "unicode/parseerr.h" 00021 00022 struct UTrie; 00023 00024 U_NAMESPACE_BEGIN 00025 00026 struct RBBIDataHeader; 00027 class RuleBasedBreakIteratorTables; 00028 class BreakIterator; 00029 class RBBIDataWrapper; 00030 00031 00032 00040 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00041 00042 protected: 00046 CharacterIterator* fText; 00047 00051 RBBIDataWrapper *fData; 00052 UTrie *fCharMappings; 00053 00055 int32_t fLastBreakTag; 00056 00062 UBool fLastBreakTagValid; 00063 00070 uint32_t fDictionaryCharCount; 00071 00072 // 00073 // Debugging flag. Trace operation of state machine when true. 00074 // 00075 static UBool fTrace; 00076 00077 00078 00079 private: 00083 static const char fgClassID; 00084 00085 protected: 00086 //======================================================================= 00087 // constructors 00088 //======================================================================= 00089 00095 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00096 00106 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00107 00108 friend class RBBIRuleBuilder; 00109 friend class BreakIterator; 00110 00111 00112 00113 public: 00114 00119 RuleBasedBreakIterator(); 00120 00126 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00127 00136 RuleBasedBreakIterator( const UnicodeString &rules, 00137 UParseError &parseError, 00138 UErrorCode &status); 00143 virtual ~RuleBasedBreakIterator(); 00144 00152 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00153 00162 virtual UBool operator==(const BreakIterator& that) const; 00163 00171 UBool operator!=(const BreakIterator& that) const; 00172 00182 virtual BreakIterator* clone() const; 00183 00189 virtual int32_t hashCode(void) const; 00190 00196 virtual const UnicodeString& getRules(void) const; 00197 00198 //======================================================================= 00199 // BreakIterator overrides 00200 //======================================================================= 00201 00210 virtual const CharacterIterator& getText(void) const; 00211 00212 00220 virtual void adoptText(CharacterIterator* newText); 00221 00228 virtual void setText(const UnicodeString& newText); 00229 00236 virtual int32_t first(void); 00237 00244 virtual int32_t last(void); 00245 00256 virtual int32_t next(int32_t n); 00257 00263 virtual int32_t next(void); 00264 00270 virtual int32_t previous(void); 00271 00279 virtual int32_t following(int32_t offset); 00280 00288 virtual int32_t preceding(int32_t offset); 00289 00298 virtual UBool isBoundary(int32_t offset); 00299 00305 virtual int32_t current(void) const; 00306 00307 00317 virtual int32_t getRuleStatus() const; 00318 00329 inline virtual UClassID getDynamicClassID(void) const; 00330 00341 inline static UClassID getStaticClassID(void); 00342 00343 /* 00344 * Create a clone (copy) of this break iterator in memory provided 00345 * by the caller. The idea is to increase performance by avoiding 00346 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00347 * Performance gains are minimal, and correct buffer management is 00348 * tricky. Use clone() instead. 00349 * 00350 * @param stackBuffer The pointer to the memory into which the cloned object 00351 * should be placed. If NULL, allocate heap memory 00352 * for the cloned object. 00353 * @param BufferSize The size of the buffer. If zero, return the required 00354 * buffer size, but do not clone the object. If the 00355 * size was too small (but not zero), allocate heap 00356 * storage for the cloned object. 00357 * 00358 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00359 * returned if the the provided buffer was too small, and 00360 * the clone was therefore put on the heap. 00361 * 00362 * @return Pointer to the clone object. This may differ from the stackBuffer 00363 * address if the byte alignment of the stack buffer was not suitable 00364 * or if the stackBuffer was too small to hold the clone. 00365 * @stable 00366 */ 00367 virtual BreakIterator * createBufferClone(void *stackBuffer, 00368 int32_t &BufferSize, 00369 UErrorCode &status); 00370 00371 00389 virtual const uint8_t *getBinaryRules(uint32_t &length); 00390 00391 00392 protected: 00393 //======================================================================= 00394 // implementation 00395 //======================================================================= 00403 virtual int32_t handleNext(void); 00404 00412 virtual int32_t handlePrevious(void); 00413 00419 virtual void reset(void); 00420 00428 virtual UBool isDictionaryChar(UChar32); 00429 00434 void init(); 00435 00436 }; 00437 00438 00439 00440 00441 //---------------------------------------------------------------------------------- 00442 // 00443 // Inline Functions Definitions ... 00444 // 00445 //---------------------------------------------------------------------------------- 00446 00447 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00448 return !operator==(that); 00449 } 00450 00451 inline UClassID RuleBasedBreakIterator::getDynamicClassID(void) const { 00452 return RuleBasedBreakIterator::getStaticClassID(); 00453 } 00454 00455 inline UClassID RuleBasedBreakIterator::getStaticClassID(void) { 00456 return (UClassID)(&fgClassID); 00457 } 00458 00459 00460 00461 U_NAMESPACE_END 00462 00463 #endif