Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

uniset.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 * Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved.
00004 **********************************************************************
00005 *   Date        Name        Description
00006 *   10/20/99    alan        Creation.
00007 **********************************************************************
00008 */
00009 
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012 
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uchar.h"
00017 
00018 U_NAMESPACE_BEGIN
00019 
00020 class ParsePosition;
00021 class SymbolTable;
00022 class TransliterationRuleParser;
00023 class TransliterationRule;
00024 class Transliterator;
00025 class TransliteratorParser;
00026 class UVector;
00027 
00259 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00260 
00261     int32_t len; // length of list used; 0 <= len <= capacity
00262     int32_t capacity; // capacity of list
00263     int32_t bufferCapacity; // capacity of buffer
00264     UChar32* list; // MUST be terminated with HIGH
00265     UChar32* buffer; // internal buffer, may be NULL
00266 
00267     UVector* strings; // maintained in sorted order
00268 
00278     UnicodeString pat;
00279 
00280 public:
00281 
00286     static const UChar32 MIN_VALUE;
00287 
00292     static const UChar32 MAX_VALUE;
00293 
00294     //----------------------------------------------------------------
00295     // Constructors &c
00296     //----------------------------------------------------------------
00297 
00298 public:
00299 
00304     UnicodeSet();
00305 
00314     UnicodeSet(UChar32 start, UChar32 end);
00315 
00324     UnicodeSet(const UnicodeString& pattern,
00325                UErrorCode& status);
00326 
00333     UnicodeSet(int8_t category, UErrorCode& status);
00334 
00339     UnicodeSet(const UnicodeSet& o);
00340 
00345     virtual ~UnicodeSet();
00346 
00351     UnicodeSet& operator=(const UnicodeSet& o);
00352 
00364     virtual UBool operator==(const UnicodeSet& o) const;
00365 
00371     UBool operator!=(const UnicodeSet& o) const;
00372 
00379     virtual UnicodeFunctor* clone() const;
00380 
00388     virtual int32_t hashCode(void) const;
00389 
00390     //----------------------------------------------------------------
00391     // Public API
00392     //----------------------------------------------------------------
00393 
00403     UnicodeSet& set(UChar32 start, UChar32 end);
00404 
00410     static UBool resemblesPattern(const UnicodeString& pattern,
00411                                   int32_t pos);
00412 
00422     virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00423                                      UErrorCode& status);
00424 
00437     virtual UnicodeString& toPattern(UnicodeString& result,
00438                                      UBool escapeUnprintable = FALSE) const;
00439 
00461     UnicodeSet& applyIntPropertyValue(UProperty prop,
00462                                       int32_t value,
00463                                       UErrorCode& ec);
00464 
00492     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00493                                    const UnicodeString& value,
00494                                    UErrorCode& ec);
00495 
00503     virtual int32_t size(void) const;
00504 
00511     virtual UBool isEmpty(void) const;
00512 
00519     virtual UBool contains(UChar32 c) const;
00520     
00529     virtual UBool contains(UChar32 start, UChar32 end) const;
00530 
00538     UBool contains(const UnicodeString& s) const;
00539     
00547     virtual UBool containsAll(const UnicodeSet& c) const;
00548     
00556     UBool containsAll(const UnicodeString& s) const;
00557     
00566     UBool containsNone(UChar32 start, UChar32 end) const;
00567 
00575     UBool containsNone(const UnicodeSet& c) const;
00576     
00584     UBool containsNone(const UnicodeString& s) const;
00585         
00594     inline UBool containsSome(UChar32 start, UChar32 end) const;
00595         
00603     inline UBool containsSome(const UnicodeSet& s) const;
00604         
00612     inline UBool containsSome(const UnicodeString& s) const;
00613         
00618     UMatchDegree matches(const Replaceable& text,
00619                          int32_t& offset,
00620                          int32_t limit,
00621                          UBool incremental);
00622 
00623  private:    
00645     static int32_t matchRest(const Replaceable& text,
00646                              int32_t start, int32_t limit,
00647                              const UnicodeString& s);
00648     
00658     int32_t findCodePoint(UChar32 c) const;
00659 
00660  public:
00661 
00669     void addMatchSetTo(UnicodeSet& toUnionTo) const;
00670 
00679     int32_t indexOf(UChar32 c) const;
00680 
00690     UChar32 charAt(int32_t index) const;
00691 
00705     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00706 
00713     UnicodeSet& add(UChar32 c);
00714 
00725     UnicodeSet& add(const UnicodeString& s);
00726 
00727  private:    
00733     static int32_t getSingleCP(const UnicodeString& s);
00734 
00735     void _add(const UnicodeString& s);
00736     
00737  public:
00745     UnicodeSet& addAll(const UnicodeString& s);
00746 
00754     UnicodeSet& retainAll(const UnicodeString& s);
00755 
00763     UnicodeSet& complementAll(const UnicodeString& s);
00764 
00771     UnicodeSet& removeAll(const UnicodeString& s);
00772 
00781     static UnicodeSet* createFrom(const UnicodeString& s);
00782 
00783     
00791     static UnicodeSet* createFromAll(const UnicodeString& s);
00792 
00805     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00806 
00807 
00812     UnicodeSet& retain(UChar32 c);
00813 
00826     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00827 
00834     UnicodeSet& remove(UChar32 c);
00835 
00844     UnicodeSet& remove(const UnicodeString& s);
00845 
00852     virtual UnicodeSet& complement(void);
00853 
00867     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00868 
00875     UnicodeSet& complement(UChar32 c);
00876 
00885     UnicodeSet& complement(const UnicodeString& s);
00886 
00898     virtual UnicodeSet& addAll(const UnicodeSet& c);
00899 
00910     virtual UnicodeSet& retainAll(const UnicodeSet& c);
00911 
00922     virtual UnicodeSet& removeAll(const UnicodeSet& c);
00923 
00933     virtual UnicodeSet& complementAll(const UnicodeSet& c);
00934 
00940     virtual UnicodeSet& clear(void);
00941 
00949     virtual int32_t getRangeCount(void) const;
00950 
00958     virtual UChar32 getRangeStart(int32_t index) const;
00959 
00967     virtual UChar32 getRangeEnd(int32_t index) const;
00968 
01017     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01018 
01024     virtual UnicodeSet& compact();
01025 
01037     static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
01038 
01047     virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
01048 
01049 private:
01050 
01051     // Private API for the USet API
01052 
01053     friend class USetAccess;
01054 
01055     int32_t getStringCount() const;
01056 
01057     const UnicodeString* getString(int32_t index) const;
01058 
01059 private:
01060 
01061     static const char fgClassID;
01062 
01063     //----------------------------------------------------------------
01064     // RuleBasedTransliterator support
01065     //----------------------------------------------------------------
01066 
01067     friend class Transliterator;
01068     friend class TransliteratorParser;
01069     friend class TransliteratorIDParser;
01070     friend class TransliterationRule;
01071 
01072     friend class RBBIRuleScanner;
01073     friend class RegexCompile;
01074 
01093     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
01094                const SymbolTable& symbols,
01095                UErrorCode& status);
01096 
01102     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
01103                UErrorCode& status);
01104 
01110     virtual UBool matchesIndexValue(uint8_t v) const;
01111 
01112 private:
01113 
01114     //----------------------------------------------------------------
01115     // Implementation: Pattern parsing
01116     //----------------------------------------------------------------
01117 
01142     void applyPattern(const UnicodeString& pattern,
01143                       ParsePosition& pos,
01144                       const SymbolTable* symbols,
01145                       UErrorCode& status);
01146 
01147     //----------------------------------------------------------------
01148     // Implementation: Utility methods
01149     //----------------------------------------------------------------
01150 
01151     void ensureCapacity(int32_t newLen);
01152 
01153     void ensureBufferCapacity(int32_t newLen);
01154 
01155     void swapBuffers(void);
01156 
01157     UBool allocateStrings();
01158 
01159     void _applyPattern(const UnicodeString& pattern,
01160                        ParsePosition& pos,
01161                        const SymbolTable* symbols,
01162                        UnicodeString& rebuiltPat,
01163                        UErrorCode& status);
01164 
01165     UnicodeString& _toPattern(UnicodeString& result,
01166                               UBool escapeUnprintable) const;
01167 
01168     UnicodeString& _generatePattern(UnicodeString& result,
01169                                     UBool escapeUnprintable) const;
01170 
01171     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01172 
01173     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01174 
01175     //----------------------------------------------------------------
01176     // Implementation: Fundamental operators
01177     //----------------------------------------------------------------
01178 
01179     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01180 
01181     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01182 
01183     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01184 
01190     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01191                                           int32_t pos);
01192 
01231     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01232                                      ParsePosition& ppos,
01233                                      UErrorCode &ec);
01234 
01239     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01240 
01249     void applyFilter(Filter filter,
01250                      void* context,
01251                      UErrorCode &status);
01252 
01257     static const UnicodeSet* getInclusions();
01258 
01259     friend class UnicodeSetIterator;
01260 };
01261 
01262 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01263     return !operator==(o);
01264 }
01265 
01266 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01267     return !containsNone(start, end);
01268 }
01269 
01270 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01271     return !containsNone(s);
01272 }
01273 
01274 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01275     return !containsNone(s);
01276 }
01277 
01278 U_NAMESPACE_END
01279 
01280 #endif

Generated on Wed Dec 18 16:49:57 2002 for ICU 2.4 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001