/* * Copyright © {1996-1999}, International Business Machines Corporation and others. All Rights Reserved. ******************************************************************************* * * File tblcoll.h * * Created by: Helena Shih * * Modification History: * * Date Name Description * 2/5/97 aliu Added streamIn and streamOut methods. Added * constructor which reads RuleBasedCollator object from * a binary file. Added writeToFile method which streams * RuleBasedCollator out to a binary file. The streamIn * and streamOut methods use istream and ostream objects * in binary mode. * 2/12/97 aliu Modified to use TableCollationData sub-object to * hold invariant data. * 2/13/97 aliu Moved several methods into this class from Collation. * Added a private RuleBasedCollator(Locale&) constructor, * to be used by Collator::createDefault(). General * clean up. * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy * constructor and getDynamicClassID. * 3/5/97 aliu Modified constructFromFile() to add parameter * specifying whether or not binary loading is to be * attempted. This is required for dynamic rule loading. * 05/07/97 helena Added memory allocation error detection. * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to * use MergeCollation::getPattern. * 6/20/97 helena Java class name change. * 8/18/97 helena Added internal API documentation. * 09/03/97 helena Added createCollationKeyValues(). * 02/10/98 damiba Added compare with "length" parameter * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java * 04/23/99 stephen Removed EDecompositionMode, merged with * Normalizer::EMode * 06/14/99 stephen Removed kResourceBundleSuffix * 11/02/99 helena Collator performance enhancements. Eliminates the * UnicodeString construction and special case for NO_OP. * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator * internal state management. * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator * to implementation file. ******************************************************************************* */ #ifndef TBLCOLL_H #define TBLCOLL_H #include "unicode/utypes.h" #include "unicode/coll.h" #include "unicode/chariter.h" #include "unicode/unistr.h" #include "unicode/sortkey.h" #include "unicode/normlzr.h" class VectorOfPToContractElement; class VectorOfInt; class VectorOfPToContractTable; class VectorOfPToExpandTable; class MergeCollation; class CollationElementIterator; class RuleBasedCollatorStreamer; class NormalizerIterator; // see tblcoll.cpp class Collator; class TableCollationData; class U_I18N_API RuleBasedCollator : public Collator { public: // constructor/destructor RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); RuleBasedCollator( const UnicodeString& rules, ECollationStrength collationStrength, UErrorCode& status); RuleBasedCollator( const UnicodeString& rules, Normalizer::EMode decompositionMode, UErrorCode& status); RuleBasedCollator( const UnicodeString& rules, ECollationStrength collationStrength, Normalizer::EMode decompositionMode, UErrorCode& status); virtual ~RuleBasedCollator(); RuleBasedCollator(const RuleBasedCollator& other); RuleBasedCollator& operator=(const RuleBasedCollator& other); virtual UBool operator==(const Collator& other) const; virtual UBool operator!=(const Collator& other) const; virtual Collator* clone(void) const; virtual CollationElementIterator* createCollationElementIterator(const UnicodeString& source) const; virtual CollationElementIterator* createCollationElementIterator(const CharacterIterator& source) const; virtual EComparisonResult compare( const UnicodeString& source, const UnicodeString& target) const; virtual EComparisonResult compare( const UnicodeString& source, const UnicodeString& target, int32_t length) const; virtual EComparisonResult compare( const UChar* source, int32_t sourceLength, const UChar* target, int32_t targetLength) const ; virtual CollationKey& getCollationKey( const UnicodeString& source, CollationKey& key, UErrorCode& status) const; virtual CollationKey& getCollationKey(const UChar *source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const; virtual int32_t hashCode(void) const; const UnicodeString& getRules(void) const; int32_t getMaxExpansion(int32_t order) const; virtual UClassID getDynamicClassID(void) const { return RuleBasedCollator::getStaticClassID(); } static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; } uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); /***************************************************************************** * PRIVATE *****************************************************************************/ private: static char fgClassID; // Streamer used to read/write binary collation data files. friend class RuleBasedCollatorStreamer; // Used to iterate over collation elements in a character source. friend class CollationElementIterator; // Collator ONLY needs access to RuleBasedCollator(const Locale&, UErrorCode&) friend class Collator; // TableCollationData ONLY needs access to UNMAPPED friend class TableCollationData; RuleBasedCollator(); int32_t addExpansion(int32_t anOrder, const UnicodeString &expandChars); void build( const UnicodeString& rules, UErrorCode& success); void addComposedChars(void); void commit(void); int32_t increment( Collator::ECollationStrength s, int32_t lastOrder); void addOrder( UChar ch, int32_t anOrder, UErrorCode& status); void addExpandOrder(const UnicodeString& groupChars, const UnicodeString& expChars, int32_t anOrder, UErrorCode& status); void addContractOrder(const UnicodeString& groupChars, int32_t anOrder, UErrorCode& status); void addContractOrder(const UnicodeString& groupChars, int32_t anOrder, UBool fwd, UErrorCode& status); int32_t getContractOrder(const UnicodeString &groupChars) const; VectorOfPToContractElement* getContractValues(UChar ch) const; VectorOfPToContractElement* getContractValues(int32_t index) const; VectorOfInt* getExpandValueList(int32_t order) const; int32_t getCharOrder(UChar ch) const; static int32_t getEntry( VectorOfPToContractElement* list, const UnicodeString& name, UBool fwd); UBool writeToFile(const char* fileName) const; // True on success /* UBool prepareForBundle() const;*/ void addToCache( const UnicodeString& key); RuleBasedCollator( const Locale& desiredLocale, UErrorCode& status); void constructFromRules( const UnicodeString& rules, UErrorCode& status); void constructFromFile( const Locale& locale, const UnicodeString& localeFileName, UBool tryBinaryFile, UErrorCode& status); void constructFromFile( const char* fileName, UErrorCode& status); void constructFromCache( const UnicodeString& key, UErrorCode& status); const char* constructFromBundle(const Locale& fileName, UErrorCode& status); //-------------------------------------------------------------------------- // Internal Static Utility Methods static char* createPathName( const UnicodeString& prefix, const UnicodeString& name, const UnicodeString& suffix); int32_t getStrengthOrder(NormalizerIterator* cursor, UErrorCode status) const; VectorOfInt* makeReorderedBuffer(NormalizerIterator* cursor, UChar colFirst, int32_t lastValue, VectorOfInt* lastExpansion) const; int32_t strengthOrder(int32_t value) const ; int32_t nextContractChar(NormalizerIterator *cursor, UChar ch, UErrorCode& status) const; static void chopLocale(UnicodeString& localeName); //-------------------------------------------------------------------------- // Constants static const int32_t UNMAPPED; static const int32_t CHARINDEX; // need look up in .commit() static const int32_t EXPANDCHARINDEX; // Expand index follows static const int32_t CONTRACTCHARINDEX; // contract indexes follow static const int32_t PRIMARYORDERINCREMENT; static const int32_t MAXIGNORABLE; static const int32_t SECONDARYORDERINCREMENT; static const int32_t TERTIARYORDERINCREMENT; static const int32_t PRIMARYORDERMASK; static const int32_t SECONDARYORDERMASK; static const int32_t TERTIARYORDERMASK; static const int32_t SECONDARYRESETMASK; static const int32_t IGNORABLEMASK; static const int32_t PRIMARYDIFFERENCEONLY; static const int32_t SECONDARYDIFFERENCEONLY; static const int32_t PRIMARYORDERSHIFT; static const int32_t SECONDARYORDERSHIFT; static const int32_t SORTKEYOFFSET; static const int32_t CONTRACTCHAROVERFLOW; static const int16_t FILEID; static UnicodeString DEFAULTRULES; static const char* kFilenameSuffix; //-------------------------------------------------------------------------- // Data Members UBool isOverIgnore; UChar lastChar; MergeCollation* mPattern; UnicodeString sbuffer; UnicodeString tbuffer; UnicodeString key; NormalizerIterator *cursor1; NormalizerIterator *cursor2; UBool dataIsOwned; TableCollationData* data; }; inline UBool RuleBasedCollator::operator!=(const Collator& other) const { return !(*this == other); } inline void RuleBasedCollator::addContractOrder(const UnicodeString &groupChars, int32_t anOrder, UErrorCode &status) { addContractOrder(groupChars, anOrder, TRUE, status); } #endif