00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef REGEX_H
00014 #define REGEX_H
00015
00016
00036 #include "unicode/utypes.h"
00037
00038 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00039
00040 #include "unicode/uobject.h"
00041 #include "unicode/unistr.h"
00042 #include "unicode/parseerr.h"
00043
00044 U_NAMESPACE_BEGIN
00045
00046
00047 class RegexMatcher;
00048 class UVector;
00049 class UStack;
00050 class UnicodeSet;
00051
00052
00058 enum {
00060 UREGEX_CANON_EQ = 128,
00062 UREGEX_CASE_INSENSITIVE = 2,
00064 UREGEX_COMMENTS = 4,
00067 UREGEX_DOTALL = 32,
00072 UREGEX_MULTILINE = 8
00073 };
00074
00075
00076
00088 class U_I18N_API RegexPattern: public UObject {
00089 public:
00090
00098 RegexPattern();
00099
00100
00106 RegexPattern(const RegexPattern &source);
00107
00113 virtual ~RegexPattern();
00114
00123 UBool operator==(const RegexPattern& that) const;
00124
00133 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);};
00134
00140 RegexPattern &operator =(const RegexPattern &source);
00141
00149 virtual RegexPattern *clone() const;
00150
00151
00172 static RegexPattern *compile( const UnicodeString ®ex,
00173 UParseError &pe,
00174 UErrorCode &status);
00175
00196 static RegexPattern *compile( const UnicodeString ®ex,
00197 uint32_t flags,
00198 UParseError &pe,
00199 UErrorCode &status);
00200
00201
00207 virtual uint32_t flags() const;
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221 virtual RegexMatcher *matcher(const UnicodeString &input,
00222 UErrorCode &status) const;
00223
00224
00239 static UBool matches(const UnicodeString ®ex,
00240 const UnicodeString &input,
00241 UParseError &pe,
00242 UErrorCode &status);
00243
00244
00249 virtual UnicodeString pattern() const;
00250
00251
00270 virtual int32_t split(const UnicodeString &input,
00271 UnicodeString dest[],
00272 int32_t destCapacity,
00273 UErrorCode &status) const;
00274
00275
00276
00281 void dump() const;
00282
00288 virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
00289
00295 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
00296
00297 private:
00298
00299
00300
00301 UnicodeString fPattern;
00302 uint32_t fFlags;
00303
00304 UVector *fCompiledPat;
00305 UnicodeString fLiteralText;
00306
00307 UVector *fSets;
00308 UBool fBadState;
00309
00310
00311 RegexMatcher *fMatcher;
00312
00313
00314
00315 int32_t fNumCaptureGroups;
00316 int32_t fMaxCaptureDigits;
00317
00318 UnicodeSet **fStaticSets;
00319
00320
00325 static const char fgClassID;
00326
00327 friend class RegexCompile;
00328 friend class RegexMatcher;
00329
00330
00331
00332
00333 void init();
00334 void zap();
00335 void dumpOp(int32_t index) const;
00336
00337
00338
00339 };
00340
00341
00342
00343
00344
00345
00346
00347
00348
00358 class U_I18N_API RegexMatcher: public UObject {
00359 public:
00366 virtual ~RegexMatcher();
00367
00368
00375 virtual UBool matches(UErrorCode &status);
00376
00377
00378
00391 virtual UBool lookingAt(UErrorCode &status);
00392
00393
00406 virtual UBool find();
00407
00408
00418 virtual UBool find(int32_t start, UErrorCode &status);
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430 virtual UnicodeString group(UErrorCode &status) const;
00431
00432
00445 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00446
00447
00453 virtual int32_t groupCount() const;
00454
00455
00463 virtual int32_t start(UErrorCode &status) const;
00464
00465
00479 virtual int32_t start(int group, UErrorCode &status) const;
00480
00481
00491 virtual int32_t end(UErrorCode &status) const;
00492
00493
00507 virtual int32_t end(int group, UErrorCode &status) const;
00508
00509
00518 virtual RegexMatcher &reset();
00519
00520
00528 virtual RegexMatcher &reset(const UnicodeString &input);
00529
00530
00537 virtual const UnicodeString &input() const;
00538
00539
00545 virtual const RegexPattern &pattern() const;
00546
00547
00564 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
00565
00566
00587 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
00588
00616 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
00617 const UnicodeString &replacement, UErrorCode &status);
00618
00619
00630 virtual UnicodeString &appendTail(UnicodeString &dest);
00631
00632
00638 virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
00639
00645 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
00646
00647 private:
00648
00649
00650
00651 RegexMatcher(const RegexPattern *pat);
00652 RegexMatcher(const RegexMatcher &other);
00653 RegexMatcher &operator =(const RegexMatcher &rhs);
00654 friend class RegexPattern;
00655
00656
00657
00658
00659
00660
00661 void MatchAt(int32_t startIdx, UErrorCode &status);
00662 inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
00663 UBool isWordBoundary(int32_t pos);
00664
00665
00666 const RegexPattern *fPattern;
00667 const UnicodeString *fInput;
00668 int32_t fInputLength;
00669 UBool fMatch;
00670 int32_t fMatchStart;
00671 int32_t fMatchEnd;
00672 int32_t fLastMatchEnd;
00673 UStack *fBackTrackStack;
00674 UVector *fCaptureStarts;
00675 UVector *fCaptureEnds;
00676
00681 static const char fgClassID;
00682
00683
00684 };
00685
00686 U_NAMESPACE_END
00687 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
00688 #endif