00001 /* 00002 * Copyright © {1996-1999}, International Business Machines Corporation and others. All Rights Reserved. 00003 ******************************************************************************* 00004 * 00005 * File tblcoll.h 00006 * 00007 * Created by: Helena Shih 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 2/5/97 aliu Added streamIn and streamOut methods. Added 00013 * constructor which reads RuleBasedCollator object from 00014 * a binary file. Added writeToFile method which streams 00015 * RuleBasedCollator out to a binary file. The streamIn 00016 * and streamOut methods use istream and ostream objects 00017 * in binary mode. 00018 * 2/12/97 aliu Modified to use TableCollationData sub-object to 00019 * hold invariant data. 00020 * 2/13/97 aliu Moved several methods into this class from Collation. 00021 * Added a private RuleBasedCollator(Locale&) constructor, 00022 * to be used by Collator::createDefault(). General 00023 * clean up. 00024 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy 00025 * constructor and getDynamicClassID. 00026 * 3/5/97 aliu Modified constructFromFile() to add parameter 00027 * specifying whether or not binary loading is to be 00028 * attempted. This is required for dynamic rule loading. 00029 * 05/07/97 helena Added memory allocation error detection. 00030 * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to 00031 * use MergeCollation::getPattern. 00032 * 6/20/97 helena Java class name change. 00033 * 8/18/97 helena Added internal API documentation. 00034 * 09/03/97 helena Added createCollationKeyValues(). 00035 * 02/10/98 damiba Added compare with "length" parameter 00036 * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java 00037 * 04/23/99 stephen Removed EDecompositionMode, merged with 00038 * Normalizer::EMode 00039 * 06/14/99 stephen Removed kResourceBundleSuffix 00040 * 11/02/99 helena Collator performance enhancements. Eliminates the 00041 * UnicodeString construction and special case for NO_OP. 00042 * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator 00043 * internal state management. 00044 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator 00045 * to implementation file. 00046 ******************************************************************************* 00047 */ 00048 00049 #ifndef TBLCOLL_H 00050 #define TBLCOLL_H 00051 00052 #include "unicode/utypes.h" 00053 #include "unicode/coll.h" 00054 #include "unicode/chariter.h" 00055 #include "unicode/unistr.h" 00056 #include "unicode/sortkey.h" 00057 #include "unicode/normlzr.h" 00058 00059 class VectorOfPToContractElement; 00060 class VectorOfInt; 00061 class VectorOfPToContractTable; 00062 class VectorOfPToExpandTable; 00063 class MergeCollation; 00064 class CollationElementIterator; 00065 class RuleBasedCollatorStreamer; 00066 class NormalizerIterator; // see tblcoll.cpp 00067 class Collator; 00068 class TableCollationData; 00069 00319 class U_I18N_API RuleBasedCollator : public Collator 00320 { 00321 public: 00322 00323 // constructor/destructor 00332 RuleBasedCollator(const UnicodeString& rules, 00333 UErrorCode& status); 00334 00335 RuleBasedCollator( const UnicodeString& rules, 00336 ECollationStrength collationStrength, 00337 UErrorCode& status); 00338 00339 RuleBasedCollator( const UnicodeString& rules, 00340 Normalizer::EMode decompositionMode, 00341 UErrorCode& status); 00342 00343 RuleBasedCollator( const UnicodeString& rules, 00344 ECollationStrength collationStrength, 00345 Normalizer::EMode decompositionMode, 00346 UErrorCode& status); 00347 00351 virtual ~RuleBasedCollator(); 00352 00353 00357 RuleBasedCollator(const RuleBasedCollator& other); 00358 00363 RuleBasedCollator& operator=(const RuleBasedCollator& other); 00364 00369 virtual UBool operator==(const Collator& other) const; 00370 00375 virtual UBool operator!=(const Collator& other) const; 00376 00382 virtual Collator* clone(void) const; 00383 00393 virtual CollationElementIterator* createCollationElementIterator(const UnicodeString& source) const; 00394 00405 virtual CollationElementIterator* createCollationElementIterator(const CharacterIterator& source) const; 00406 00420 virtual EComparisonResult compare( const UnicodeString& source, 00421 const UnicodeString& target) const; 00422 00423 00438 virtual EComparisonResult compare( const UnicodeString& source, 00439 const UnicodeString& target, 00440 int32_t length) const; 00441 00471 virtual EComparisonResult compare( const UChar* source, 00472 int32_t sourceLength, 00473 const UChar* target, 00474 int32_t targetLength) const ; 00475 00487 virtual CollationKey& getCollationKey( const UnicodeString& source, 00488 CollationKey& key, 00489 UErrorCode& status) const; 00490 00502 virtual CollationKey& getCollationKey(const UChar *source, 00503 int32_t sourceLength, 00504 CollationKey& key, 00505 UErrorCode& status) const; 00506 00512 virtual int32_t hashCode(void) const; 00513 00520 const UnicodeString& getRules(void) const; 00521 00533 int32_t getMaxExpansion(int32_t order) const; 00534 00545 virtual UClassID getDynamicClassID(void) const 00546 { return RuleBasedCollator::getStaticClassID(); } 00547 00548 00559 static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; } 00560 00561 00571 uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); 00572 00573 /***************************************************************************** 00574 * PRIVATE 00575 *****************************************************************************/ 00576 private: 00577 static char fgClassID; 00578 00579 // Streamer used to read/write binary collation data files. 00580 friend class RuleBasedCollatorStreamer; 00581 00582 // Used to iterate over collation elements in a character source. 00583 friend class CollationElementIterator; 00584 00585 // Collator ONLY needs access to RuleBasedCollator(const Locale&, UErrorCode&) 00586 friend class Collator; 00587 00588 // TableCollationData ONLY needs access to UNMAPPED 00589 friend class TableCollationData; 00590 00591 00594 RuleBasedCollator(); 00595 00601 int32_t addExpansion(int32_t anOrder, 00602 const UnicodeString &expandChars); 00608 void build( const UnicodeString& rules, 00609 UErrorCode& success); 00610 00614 void addComposedChars(void); 00615 00619 void commit(void); 00626 int32_t increment( Collator::ECollationStrength s, 00627 int32_t lastOrder); 00634 void addOrder( UChar ch, 00635 int32_t anOrder, 00636 UErrorCode& status); 00644 void addExpandOrder(const UnicodeString& groupChars, 00645 const UnicodeString& expChars, 00646 int32_t anOrder, 00647 UErrorCode& status); 00654 void addContractOrder(const UnicodeString& groupChars, 00655 int32_t anOrder, 00656 UErrorCode& status); 00664 void addContractOrder(const UnicodeString& groupChars, 00665 int32_t anOrder, 00666 UBool fwd, 00667 UErrorCode& status); 00675 int32_t getContractOrder(const UnicodeString &groupChars) const; 00683 VectorOfPToContractElement* 00684 getContractValues(UChar ch) const; 00692 VectorOfPToContractElement* 00693 getContractValues(int32_t index) const; 00701 VectorOfInt* getExpandValueList(int32_t order) const; 00702 00708 int32_t getCharOrder(UChar ch) const; 00709 00716 static int32_t getEntry( VectorOfPToContractElement* list, 00717 const UnicodeString& name, 00718 UBool fwd); 00719 00729 UBool writeToFile(const char* fileName) const; // True on success 00730 /* UBool prepareForBundle() const;*/ 00731 00738 void addToCache( const UnicodeString& key); 00739 00748 RuleBasedCollator( const Locale& desiredLocale, 00749 UErrorCode& status); 00765 void constructFromRules( const UnicodeString& rules, 00766 UErrorCode& status); 00767 void constructFromFile( const Locale& locale, 00768 const UnicodeString& localeFileName, 00769 UBool tryBinaryFile, 00770 UErrorCode& status); 00771 void constructFromFile( const char* fileName, 00772 UErrorCode& status); 00773 void constructFromCache( const UnicodeString& key, 00774 UErrorCode& status); 00775 const char* constructFromBundle(const Locale& fileName, 00776 UErrorCode& status); 00777 00778 00779 //-------------------------------------------------------------------------- 00780 // Internal Static Utility Methods 00788 static char* createPathName( const UnicodeString& prefix, 00789 const UnicodeString& name, 00790 const UnicodeString& suffix); 00791 00792 int32_t getStrengthOrder(NormalizerIterator* cursor, 00793 UErrorCode status) const; 00794 VectorOfInt* makeReorderedBuffer(NormalizerIterator* cursor, 00795 UChar colFirst, 00796 int32_t lastValue, 00797 VectorOfInt* lastExpansion) const; 00798 int32_t strengthOrder(int32_t value) const ; 00799 int32_t nextContractChar(NormalizerIterator *cursor, 00800 UChar ch, 00801 UErrorCode& status) const; 00807 static void chopLocale(UnicodeString& localeName); 00808 00809 //-------------------------------------------------------------------------- 00810 // Constants 00811 00812 static const int32_t UNMAPPED; 00813 static const int32_t CHARINDEX; // need look up in .commit() 00814 static const int32_t EXPANDCHARINDEX; // Expand index follows 00815 static const int32_t CONTRACTCHARINDEX; // contract indexes follow 00816 00817 static const int32_t PRIMARYORDERINCREMENT; 00818 static const int32_t MAXIGNORABLE; 00819 static const int32_t SECONDARYORDERINCREMENT; 00820 static const int32_t TERTIARYORDERINCREMENT; 00821 static const int32_t PRIMARYORDERMASK; 00822 static const int32_t SECONDARYORDERMASK; 00823 static const int32_t TERTIARYORDERMASK; 00824 static const int32_t SECONDARYRESETMASK; 00825 static const int32_t IGNORABLEMASK; 00826 static const int32_t PRIMARYDIFFERENCEONLY; 00827 static const int32_t SECONDARYDIFFERENCEONLY; 00828 static const int32_t PRIMARYORDERSHIFT; 00829 static const int32_t SECONDARYORDERSHIFT; 00830 static const int32_t SORTKEYOFFSET; 00831 static const int32_t CONTRACTCHAROVERFLOW; 00832 00833 static const int16_t FILEID; 00834 00835 static UnicodeString DEFAULTRULES; 00836 00837 static const char* kFilenameSuffix; 00838 00839 //-------------------------------------------------------------------------- 00840 // Data Members 00841 00842 UBool isOverIgnore; 00843 UChar lastChar; 00844 MergeCollation* mPattern; 00845 UnicodeString sbuffer; 00846 UnicodeString tbuffer; 00847 UnicodeString key; 00848 NormalizerIterator *cursor1; 00849 NormalizerIterator *cursor2; 00850 UBool dataIsOwned; 00851 TableCollationData* data; 00852 }; 00853 00854 inline UBool 00855 RuleBasedCollator::operator!=(const Collator& other) const 00856 { 00857 return !(*this == other); 00858 } 00859 00860 inline void 00861 RuleBasedCollator::addContractOrder(const UnicodeString &groupChars, 00862 int32_t anOrder, 00863 UErrorCode &status) 00864 { 00865 addContractOrder(groupChars, anOrder, TRUE, status); 00866 } 00867 00868 00869 00870 00871 #endif