00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011
00012 #include "unicode/utypes.h"
00013 #include "unicode/uobject.h"
00014 #include "unicode/unistr.h"
00015 #include "unicode/chariter.h"
00016 #include "unicode/unorm.h"
00017
00018 struct UCharIterator;
00019 typedef struct UCharIterator UCharIterator;
00021 U_NAMESPACE_BEGIN
00112 class U_COMMON_API Normalizer : public UObject {
00113 public:
00119 enum {
00120 DONE=0xffff
00121 };
00122
00123
00124
00135 Normalizer(const UnicodeString& str, UNormalizationMode mode);
00136
00148 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00149
00160 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00161
00167 Normalizer(const Normalizer& copy);
00168
00173 ~Normalizer();
00174
00175
00176
00177
00178
00179
00197 static void normalize(const UnicodeString& source,
00198 UNormalizationMode mode, int32_t options,
00199 UnicodeString& result,
00200 UErrorCode &status);
00201
00223 static void compose(const UnicodeString& source,
00224 UBool compat, int32_t options,
00225 UnicodeString& result,
00226 UErrorCode &status);
00227
00250 static void decompose(const UnicodeString& source,
00251 UBool compat, int32_t options,
00252 UnicodeString& result,
00253 UErrorCode &status);
00254
00275 static inline UNormalizationCheckResult
00276 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00277
00298 static inline UBool
00299 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00300
00330 static UnicodeString &
00331 concatenate(UnicodeString &left, UnicodeString &right,
00332 UnicodeString &result,
00333 UNormalizationMode mode, int32_t options,
00334 UErrorCode &errorCode);
00335
00398 static inline int32_t
00399 compare(const UnicodeString &s1, const UnicodeString &s2,
00400 uint32_t options,
00401 UErrorCode &errorCode);
00402
00403
00404
00405
00406
00415 UChar32 current(void);
00416
00425 UChar32 first(void);
00426
00435 UChar32 last(void);
00436
00445 UChar32 next(void);
00446
00455 UChar32 previous(void);
00456
00457 #ifdef ICU_NORMALIZER_USE_DEPRECATES
00458
00477 UChar32 setIndex(int32_t index);
00478 #endif
00479
00489 void setIndexOnly(int32_t index);
00490
00496 void reset(void);
00497
00512 int32_t getIndex(void) const;
00513
00522 int32_t startIndex(void) const;
00523
00534 int32_t endIndex(void) const;
00535
00544 UBool operator==(const Normalizer& that) const;
00545
00554 inline UBool operator!=(const Normalizer& that) const;
00555
00562 Normalizer* clone(void) const;
00563
00570 int32_t hashCode(void) const;
00571
00572
00573
00574
00575
00591 void setMode(UNormalizationMode newMode);
00592
00603 UNormalizationMode getUMode(void) const;
00604
00621 void setOption(int32_t option,
00622 UBool value);
00623
00634 UBool getOption(int32_t option) const;
00635
00644 void setText(const UnicodeString& newText,
00645 UErrorCode &status);
00646
00655 void setText(const CharacterIterator& newText,
00656 UErrorCode &status);
00657
00667 void setText(const UChar* newText,
00668 int32_t length,
00669 UErrorCode &status);
00676 void getText(UnicodeString& result);
00677
00683 virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
00684
00690 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
00691
00692
00693
00694
00695
00696 #ifdef ICU_NORMALIZER_USE_DEPRECATES
00697
00698 enum {
00717 IGNORE_HANGUL = 0x001
00718 };
00719
00724 enum {
00725 COMPAT_BIT = 1,
00726 DECOMP_BIT = 2,
00727 COMPOSE_BIT = 4,
00728 FCD_BIT = 8
00729 };
00730
00735 enum EMode {
00749 NO_OP = 0,
00750
00766 COMPOSE = COMPOSE_BIT,
00767
00783 COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT,
00784
00800 DECOMP = DECOMP_BIT,
00801
00817 DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT,
00818
00822 FCD = FCD_BIT
00823 };
00824
00835 Normalizer(const UnicodeString& str,
00836 EMode mode);
00837
00856 Normalizer(const UnicodeString& str,
00857 EMode mode,
00858 int32_t opt);
00859
00871 Normalizer(const UChar* str,
00872 int32_t length,
00873 EMode mode);
00874
00890 Normalizer(const UChar* str,
00891 int32_t length,
00892 EMode mode,
00893 int32_t option);
00894
00905 Normalizer(const CharacterIterator& iter,
00906 EMode mode);
00907
00923 Normalizer(const CharacterIterator& iter,
00924 EMode mode,
00925 int32_t opt);
00926
00947 inline static void
00948 normalize(const UnicodeString& source,
00949 EMode mode,
00950 int32_t options,
00951 UnicodeString& result,
00952 UErrorCode &status);
00953
00970 inline static UNormalizationCheckResult
00971 quickCheck(const UnicodeString& source,
00972 EMode mode,
00973 UErrorCode& status);
00974
00982 inline static UNormalizationMode getUNormalizationMode(EMode mode,
00983 UErrorCode& status);
00984
00992 inline static EMode getNormalizerEMode(UNormalizationMode mode,
00993 UErrorCode& status);
00994
01021 inline void setMode(EMode newMode);
01022
01029 inline EMode getMode(void) const;
01030 #endif
01031
01032 private:
01033
01034
01035
01036
01037
01038
01039 UBool nextNormalize();
01040 UBool previousNormalize();
01041
01042 void init(CharacterIterator *iter);
01043 void clearBuffer(void);
01044
01045 #ifdef ICU_NORMALIZER_USE_DEPRECATES
01046
01047
01048 inline static UNormalizationMode getUMode(EMode mode);
01049 #endif
01050
01051
01052
01053
01054
01055 UNormalizationMode fUMode;
01056 int32_t fOptions;
01057
01058
01059 UCharIterator *text;
01060
01061
01062
01063 int32_t currentIndex, nextIndex;
01064
01065
01066 UnicodeString buffer;
01067 int32_t bufferPos;
01068
01073 static const char fgClassID;
01074 };
01075
01076
01077
01078
01079
01080 inline UBool
01081 Normalizer::operator!= (const Normalizer& other) const
01082 { return ! operator==(other); }
01083
01084 inline UNormalizationCheckResult
01085 Normalizer::quickCheck(const UnicodeString& source,
01086 UNormalizationMode mode,
01087 UErrorCode &status) {
01088 if(U_FAILURE(status)) {
01089 return UNORM_MAYBE;
01090 }
01091
01092 return unorm_quickCheck(source.getBuffer(), source.length(),
01093 mode, &status);
01094 }
01095
01096 inline UBool
01097 Normalizer::isNormalized(const UnicodeString& source,
01098 UNormalizationMode mode,
01099 UErrorCode &status) {
01100 if(U_FAILURE(status)) {
01101 return FALSE;
01102 }
01103
01104 return unorm_isNormalized(source.getBuffer(), source.length(),
01105 mode, &status);
01106 }
01107
01108 inline int32_t
01109 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
01110 uint32_t options,
01111 UErrorCode &errorCode) {
01112
01113 return unorm_compare(s1.getBuffer(), s1.length(),
01114 s2.getBuffer(), s2.length(),
01115 options,
01116 &errorCode);
01117 }
01118
01119 #ifdef ICU_NORMALIZER_USE_DEPRECATES
01120 inline void
01121 Normalizer::normalize(const UnicodeString& source,
01122 EMode mode, int32_t options,
01123 UnicodeString& result,
01124 UErrorCode &status) {
01125 normalize(source, getUNormalizationMode(mode, status), options, result, status);
01126 }
01127
01128 inline UNormalizationCheckResult
01129 Normalizer::quickCheck(const UnicodeString& source,
01130 EMode mode,
01131 UErrorCode &status) {
01132 return quickCheck(source, getUNormalizationMode(mode, status), status);
01133 }
01134
01135 inline void
01136 Normalizer::setMode(EMode newMode) {
01137 UErrorCode status = U_ZERO_ERROR;
01138 fUMode = getUNormalizationMode(newMode, status);
01139 }
01140
01141 inline Normalizer::EMode
01142 Normalizer::getMode() const {
01143 UErrorCode status = U_ZERO_ERROR;
01144 return getNormalizerEMode(fUMode, status);
01145 }
01146
01147 inline UNormalizationMode Normalizer::getUNormalizationMode(
01148 Normalizer::EMode mode, UErrorCode &status)
01149 {
01150 if (U_SUCCESS(status))
01151 {
01152 switch (mode)
01153 {
01154 case Normalizer::NO_OP :
01155 return UNORM_NONE;
01156 case Normalizer::COMPOSE :
01157 return UNORM_NFC;
01158 case Normalizer::COMPOSE_COMPAT :
01159 return UNORM_NFKC;
01160 case Normalizer::DECOMP :
01161 return UNORM_NFD;
01162 case Normalizer::DECOMP_COMPAT :
01163 return UNORM_NFKD;
01164 case Normalizer::FCD:
01165 return UNORM_FCD;
01166 default :
01167 status = U_ILLEGAL_ARGUMENT_ERROR;
01168 }
01169 }
01170 return UNORM_DEFAULT;
01171 }
01172
01173 inline UNormalizationMode
01174 Normalizer::getUMode(Normalizer::EMode mode) {
01175 switch(mode) {
01176 case Normalizer::NO_OP :
01177 return UNORM_NONE;
01178 case Normalizer::COMPOSE :
01179 return UNORM_NFC;
01180 case Normalizer::COMPOSE_COMPAT :
01181 return UNORM_NFKC;
01182 case Normalizer::DECOMP :
01183 return UNORM_NFD;
01184 case Normalizer::DECOMP_COMPAT :
01185 return UNORM_NFKD;
01186 case Normalizer::FCD:
01187 return UNORM_FCD;
01188 default :
01189 return UNORM_DEFAULT;
01190 }
01191 }
01192
01193 inline Normalizer::EMode Normalizer::getNormalizerEMode(
01194 UNormalizationMode mode, UErrorCode &status)
01195 {
01196 if (U_SUCCESS(status))
01197 {
01198 switch (mode)
01199 {
01200 case UNORM_NONE :
01201 return Normalizer::NO_OP;
01202 case UNORM_NFD :
01203 return Normalizer::DECOMP;
01204 case UNORM_NFKD :
01205 return Normalizer::DECOMP_COMPAT;
01206 case UNORM_NFC :
01207 return Normalizer::COMPOSE;
01208 case UNORM_NFKC :
01209 return Normalizer::COMPOSE_COMPAT;
01210 case UNORM_FCD:
01211 return Normalizer::FCD;
01212 default :
01213 status = U_ILLEGAL_ARGUMENT_ERROR;
01214 }
01215 }
01216 return Normalizer::DECOMP_COMPAT;
01217 }
01218 #endif
01219
01220 U_NAMESPACE_END
01221 #endif // _NORMLZR