00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2002 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 00019 #if !UCONFIG_NO_BREAK_ITERATION 00020 00021 #include "unicode/brkiter.h" 00022 #include "unicode/udata.h" 00023 #include "unicode/parseerr.h" 00024 00025 struct UTrie; 00026 00027 U_NAMESPACE_BEGIN 00028 00029 struct RBBIDataHeader; 00030 class RuleBasedBreakIteratorTables; 00031 class BreakIterator; 00032 class RBBIDataWrapper; 00033 00034 00035 00050 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00051 00052 protected: 00057 CharacterIterator* fText; 00058 00063 RBBIDataWrapper *fData; 00064 UTrie *fCharMappings; 00065 00069 int32_t fLastBreakTag; 00070 00077 UBool fLastBreakTagValid; 00078 00086 uint32_t fDictionaryCharCount; 00087 00092 static UBool fTrace; 00093 00094 00095 00096 private: 00100 static const char fgClassID; 00101 00102 protected: 00103 //======================================================================= 00104 // constructors 00105 //======================================================================= 00106 00113 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00114 00125 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00126 00127 friend class RBBIRuleBuilder; 00128 friend class BreakIterator; 00129 00130 00131 00132 public: 00133 00138 RuleBasedBreakIterator(); 00139 00146 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00147 00156 RuleBasedBreakIterator( const UnicodeString &rules, 00157 UParseError &parseError, 00158 UErrorCode &status); 00163 virtual ~RuleBasedBreakIterator(); 00164 00172 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00173 00182 virtual UBool operator==(const BreakIterator& that) const; 00183 00191 UBool operator!=(const BreakIterator& that) const; 00192 00202 virtual BreakIterator* clone() const; 00203 00209 virtual int32_t hashCode(void) const; 00210 00216 virtual const UnicodeString& getRules(void) const; 00217 00218 //======================================================================= 00219 // BreakIterator overrides 00220 //======================================================================= 00221 00230 virtual const CharacterIterator& getText(void) const; 00231 00232 00240 virtual void adoptText(CharacterIterator* newText); 00241 00248 virtual void setText(const UnicodeString& newText); 00249 00256 virtual int32_t first(void); 00257 00264 virtual int32_t last(void); 00265 00276 virtual int32_t next(int32_t n); 00277 00283 virtual int32_t next(void); 00284 00290 virtual int32_t previous(void); 00291 00299 virtual int32_t following(int32_t offset); 00300 00308 virtual int32_t preceding(int32_t offset); 00309 00318 virtual UBool isBoundary(int32_t offset); 00319 00325 virtual int32_t current(void) const; 00326 00327 00337 virtual int32_t getRuleStatus() const; 00338 00350 inline virtual UClassID getDynamicClassID(void) const; 00351 00363 inline static UClassID getStaticClassID(void); 00364 00365 /* 00366 * Create a clone (copy) of this break iterator in memory provided 00367 * by the caller. The idea is to increase performance by avoiding 00368 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00369 * Performance gains are minimal, and correct buffer management is 00370 * tricky. Use clone() instead. 00371 * 00372 * @param stackBuffer The pointer to the memory into which the cloned object 00373 * should be placed. If NULL, allocate heap memory 00374 * for the cloned object. 00375 * @param BufferSize The size of the buffer. If zero, return the required 00376 * buffer size, but do not clone the object. If the 00377 * size was too small (but not zero), allocate heap 00378 * storage for the cloned object. 00379 * 00380 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00381 * returned if the the provided buffer was too small, and 00382 * the clone was therefore put on the heap. 00383 * 00384 * @return Pointer to the clone object. This may differ from the stackBuffer 00385 * address if the byte alignment of the stack buffer was not suitable 00386 * or if the stackBuffer was too small to hold the clone. 00387 * @stable ICU 2.0 00388 */ 00389 virtual BreakIterator * createBufferClone(void *stackBuffer, 00390 int32_t &BufferSize, 00391 UErrorCode &status); 00392 00393 00411 virtual const uint8_t *getBinaryRules(uint32_t &length); 00412 00413 00414 protected: 00415 //======================================================================= 00416 // implementation 00417 //======================================================================= 00426 virtual int32_t handleNext(void); 00427 00436 virtual int32_t handlePrevious(void); 00437 00444 virtual void reset(void); 00445 00454 virtual UBool isDictionaryChar(UChar32); 00455 00461 void init(); 00462 00463 }; 00464 00465 00466 00467 00468 //---------------------------------------------------------------------------------- 00469 // 00470 // Inline Functions Definitions ... 00471 // 00472 //---------------------------------------------------------------------------------- 00473 00474 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00475 return !operator==(that); 00476 } 00477 00478 inline UClassID RuleBasedBreakIterator::getDynamicClassID(void) const { 00479 return RuleBasedBreakIterator::getStaticClassID(); 00480 } 00481 00482 inline UClassID RuleBasedBreakIterator::getStaticClassID(void) { 00483 return (UClassID)(&fgClassID); 00484 } 00485 00486 00487 00488 U_NAMESPACE_END 00489 00490 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 00491 00492 #endif