Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

normlzr.h

Go to the documentation of this file.
00001 /*
00002  ********************************************************************
00003  * COPYRIGHT: 
00004  * Copyright (c) 1996-2001, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 #include "unicode/uobject.h"
00014 #include "unicode/unistr.h"
00015 #include "unicode/chariter.h"
00016 #include "unicode/unorm.h"
00017 
00018 struct UCharIterator;
00019 typedef struct UCharIterator UCharIterator; 
00021 U_NAMESPACE_BEGIN
00112 class U_COMMON_API Normalizer : public UObject {
00113 public:
00119   enum {
00120       DONE=0xffff
00121   };
00122 
00123   // Constructors
00124 
00135   Normalizer(const UnicodeString& str, UNormalizationMode mode);
00136     
00148   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00149 
00160   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00161 
00167   Normalizer(const Normalizer& copy);
00168 
00173   ~Normalizer();
00174 
00175 
00176   //-------------------------------------------------------------------------
00177   // Static utility methods
00178   //-------------------------------------------------------------------------
00179 
00197   static void normalize(const UnicodeString& source,
00198                         UNormalizationMode mode, int32_t options,
00199                         UnicodeString& result,
00200                         UErrorCode &status);
00201 
00223   static void compose(const UnicodeString& source,
00224                       UBool compat, int32_t options,
00225                       UnicodeString& result,
00226                       UErrorCode &status);
00227 
00250   static void decompose(const UnicodeString& source,
00251                         UBool compat, int32_t options,
00252                         UnicodeString& result,
00253                         UErrorCode &status);
00254 
00275   static inline UNormalizationCheckResult
00276   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00277 
00298   static inline UBool
00299   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00300 
00330   static UnicodeString &
00331   concatenate(UnicodeString &left, UnicodeString &right,
00332               UnicodeString &result,
00333               UNormalizationMode mode, int32_t options,
00334               UErrorCode &errorCode);
00335 
00398   static inline int32_t
00399   compare(const UnicodeString &s1, const UnicodeString &s2,
00400           uint32_t options,
00401           UErrorCode &errorCode);
00402 
00403   //-------------------------------------------------------------------------
00404   // Iteration API
00405   //-------------------------------------------------------------------------
00406   
00415   UChar32              current(void);
00416 
00425   UChar32              first(void);
00426 
00435   UChar32              last(void);
00436 
00445   UChar32              next(void);
00446 
00455   UChar32              previous(void);
00456 
00457 #ifdef ICU_NORMALIZER_USE_DEPRECATES
00458 
00477   UChar32              setIndex(int32_t index);
00478 #endif
00479 
00489   void                 setIndexOnly(int32_t index);
00490 
00496   void                reset(void);
00497 
00512   int32_t            getIndex(void) const;
00513 
00522   int32_t            startIndex(void) const;
00523 
00534   int32_t            endIndex(void) const;
00535 
00544   UBool        operator==(const Normalizer& that) const;
00545 
00554   inline UBool        operator!=(const Normalizer& that) const;
00555 
00562   Normalizer*        clone(void) const;
00563 
00570   int32_t                hashCode(void) const;
00571 
00572   //-------------------------------------------------------------------------
00573   // Property access methods
00574   //-------------------------------------------------------------------------
00575 
00591   void setMode(UNormalizationMode newMode);
00592 
00603   UNormalizationMode getUMode(void) const;
00604 
00621   void setOption(int32_t option, 
00622          UBool value);
00623 
00634   UBool getOption(int32_t option) const;
00635 
00644   void setText(const UnicodeString& newText, 
00645            UErrorCode &status);
00646 
00655   void setText(const CharacterIterator& newText, 
00656            UErrorCode &status);
00657 
00667   void setText(const UChar* newText,
00668                     int32_t length,
00669             UErrorCode &status);
00676   void            getText(UnicodeString&  result);
00677 
00683   virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
00684 
00690   static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
00691 
00692   //-------------------------------------------------------------------------
00693   // Obsolete APIs
00694   //-------------------------------------------------------------------------
00695 
00696 #ifdef ICU_NORMALIZER_USE_DEPRECATES
00697 
00698   enum {
00717     IGNORE_HANGUL     = 0x001
00718   };
00719 
00724   enum {
00725     COMPAT_BIT         = 1,
00726     DECOMP_BIT         = 2,
00727     COMPOSE_BIT        = 4,
00728     FCD_BIT            = 8
00729   };
00730 
00735   enum EMode {
00749     NO_OP         = 0,
00750     
00766     COMPOSE         = COMPOSE_BIT,
00767 
00783     COMPOSE_COMPAT     = COMPOSE_BIT | COMPAT_BIT,
00784 
00800     DECOMP         = DECOMP_BIT,
00801 
00817     DECOMP_COMPAT     = DECOMP_BIT | COMPAT_BIT,
00818 
00822     FCD = FCD_BIT
00823   };
00824 
00835   Normalizer(const UnicodeString& str, 
00836          EMode mode);
00837     
00856   Normalizer(const UnicodeString& str, 
00857          EMode mode, 
00858          int32_t opt);
00859 
00871   Normalizer(const UChar* str,
00872          int32_t length,
00873          EMode mode);
00874 
00890   Normalizer(const UChar* str,
00891          int32_t length,
00892          EMode mode,
00893          int32_t option);
00894 
00905   Normalizer(const CharacterIterator& iter, 
00906          EMode mode);
00907 
00923   Normalizer(const CharacterIterator& iter, 
00924          EMode mode, 
00925          int32_t opt);
00926 
00947   inline static void
00948   normalize(const UnicodeString& source, 
00949             EMode mode, 
00950             int32_t options,
00951             UnicodeString& result, 
00952             UErrorCode &status);
00953 
00970   inline static UNormalizationCheckResult
00971   quickCheck(const UnicodeString& source,
00972              EMode                mode, 
00973              UErrorCode&          status);
00974 
00982   inline static UNormalizationMode getUNormalizationMode(EMode mode, 
00983                                                   UErrorCode& status);
00984 
00992   inline static EMode getNormalizerEMode(UNormalizationMode mode, 
00993                                          UErrorCode& status);
00994 
01021   inline void setMode(EMode newMode);
01022 
01029   inline EMode getMode(void) const;
01030 #endif /* ICU_NORMALIZER_USE_DEPRECATES */
01031 
01032 private:
01033   //-------------------------------------------------------------------------
01034   // Private functions
01035   //-------------------------------------------------------------------------
01036 
01037   // Private utility methods for iteration
01038   // For documentation, see the source code
01039   UBool nextNormalize();
01040   UBool previousNormalize();
01041 
01042   void    init(CharacterIterator *iter);
01043   void    clearBuffer(void);
01044 
01045 #ifdef ICU_NORMALIZER_USE_DEPRECATES
01046   // Helper, without UErrorCode, for easier transitional code
01047   // remove after 2002-sep-30 with EMode etc.
01048   inline static UNormalizationMode getUMode(EMode mode);
01049 #endif  /* ICU_NORMALIZER_USE_DEPRECATES */
01050 
01051   //-------------------------------------------------------------------------
01052   // Private data
01053   //-------------------------------------------------------------------------
01054 
01055   UNormalizationMode  fUMode;
01056   int32_t             fOptions;
01057 
01058   // The input text and our position in it
01059   UCharIterator       *text;
01060 
01061   // The normalization buffer is the result of normalization
01062   // of the source in [currentIndex..nextIndex[ .
01063   int32_t         currentIndex, nextIndex;
01064 
01065   // A buffer for holding intermediate results
01066   UnicodeString       buffer;
01067   int32_t         bufferPos;
01068 
01073   static const char fgClassID;
01074 };
01075 
01076 //-------------------------------------------------------------------------
01077 // Inline implementations
01078 //-------------------------------------------------------------------------
01079 
01080 inline UBool
01081 Normalizer::operator!= (const Normalizer& other) const
01082 { return ! operator==(other); }
01083 
01084 inline UNormalizationCheckResult
01085 Normalizer::quickCheck(const UnicodeString& source,
01086                        UNormalizationMode mode, 
01087                        UErrorCode &status) {
01088     if(U_FAILURE(status)) {
01089         return UNORM_MAYBE;
01090     }
01091 
01092     return unorm_quickCheck(source.getBuffer(), source.length(),
01093                             mode, &status);
01094 }
01095 
01096 inline UBool
01097 Normalizer::isNormalized(const UnicodeString& source,
01098                          UNormalizationMode mode, 
01099                          UErrorCode &status) {
01100     if(U_FAILURE(status)) {
01101         return FALSE;
01102     }
01103 
01104     return unorm_isNormalized(source.getBuffer(), source.length(),
01105                               mode, &status);
01106 }
01107 
01108 inline int32_t
01109 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
01110                     uint32_t options,
01111                     UErrorCode &errorCode) {
01112   // all argument checking is done in unorm_compare
01113   return unorm_compare(s1.getBuffer(), s1.length(),
01114                        s2.getBuffer(), s2.length(),
01115                        options,
01116                        &errorCode);
01117 }
01118 
01119 #ifdef ICU_NORMALIZER_USE_DEPRECATES
01120 inline void 
01121 Normalizer::normalize(const UnicodeString& source, 
01122                       EMode mode, int32_t options,
01123                       UnicodeString& result, 
01124                       UErrorCode &status) {
01125   normalize(source, getUNormalizationMode(mode, status), options, result, status);
01126 }
01127 
01128 inline UNormalizationCheckResult
01129 Normalizer::quickCheck(const UnicodeString& source,
01130                        EMode mode, 
01131                        UErrorCode &status) {
01132   return quickCheck(source, getUNormalizationMode(mode, status), status);
01133 }
01134 
01135 inline void
01136 Normalizer::setMode(EMode newMode) {
01137   UErrorCode status = U_ZERO_ERROR;
01138   fUMode = getUNormalizationMode(newMode, status);
01139 }
01140 
01141 inline Normalizer::EMode
01142 Normalizer::getMode() const {
01143   UErrorCode status = U_ZERO_ERROR;
01144   return getNormalizerEMode(fUMode, status);
01145 }
01146 
01147 inline UNormalizationMode Normalizer::getUNormalizationMode(
01148                                    Normalizer::EMode  mode, UErrorCode &status)
01149 {
01150   if (U_SUCCESS(status))
01151   { 
01152     switch (mode)
01153     {
01154     case Normalizer::NO_OP : 
01155       return UNORM_NONE;
01156     case Normalizer::COMPOSE :
01157       return UNORM_NFC;
01158     case Normalizer::COMPOSE_COMPAT :
01159       return UNORM_NFKC;
01160     case Normalizer::DECOMP :
01161       return UNORM_NFD;
01162     case Normalizer::DECOMP_COMPAT :
01163       return UNORM_NFKD;
01164     case Normalizer::FCD:
01165       return UNORM_FCD;
01166     default : 
01167       status = U_ILLEGAL_ARGUMENT_ERROR; 
01168     }
01169   }
01170   return UNORM_DEFAULT;
01171 }
01172 
01173 inline UNormalizationMode
01174 Normalizer::getUMode(Normalizer::EMode mode) {
01175   switch(mode) {
01176   case Normalizer::NO_OP : 
01177     return UNORM_NONE;
01178   case Normalizer::COMPOSE :
01179     return UNORM_NFC;
01180   case Normalizer::COMPOSE_COMPAT :
01181     return UNORM_NFKC;
01182   case Normalizer::DECOMP :
01183     return UNORM_NFD;
01184   case Normalizer::DECOMP_COMPAT :
01185     return UNORM_NFKD;
01186   case Normalizer::FCD:
01187     return UNORM_FCD;
01188   default : 
01189     return UNORM_DEFAULT;
01190   }
01191 }
01192 
01193 inline Normalizer::EMode Normalizer::getNormalizerEMode(
01194                                   UNormalizationMode mode, UErrorCode &status)
01195 {
01196   if (U_SUCCESS(status))
01197   {
01198     switch (mode)
01199     {
01200     case UNORM_NONE :
01201       return Normalizer::NO_OP;
01202     case UNORM_NFD :
01203       return Normalizer::DECOMP;
01204     case UNORM_NFKD :
01205       return Normalizer::DECOMP_COMPAT;
01206     case UNORM_NFC :
01207       return Normalizer::COMPOSE;
01208     case UNORM_NFKC :
01209       return Normalizer::COMPOSE_COMPAT;
01210     case UNORM_FCD:
01211       return Normalizer::FCD;
01212     default : 
01213       status = U_ILLEGAL_ARGUMENT_ERROR; 
01214     }
01215   }
01216   return Normalizer::DECOMP_COMPAT;
01217 }
01218 #endif /* ICU_NORMALIZER_USE_DEPRECATES */
01219 
01220 U_NAMESPACE_END
01221 #endif // _NORMLZR

Generated on Wed Dec 18 16:49:39 2002 for ICU 2.4 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001