00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
#ifndef REGEX_H
00017
#define REGEX_H
00018
00019
00020
00040
#include "unicode/utypes.h"
00041
00042
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
00043
00044
#include "unicode/uobject.h"
00045
#include "unicode/unistr.h"
00046
#include "unicode/parseerr.h"
00047
00048
#include "unicode/uregex.h"
00049
00050
U_NAMESPACE_BEGIN
00051
00052
00053
00054
00055
class RegexMatcher;
00056
class RegexPattern;
00057
class UVector;
00058
class UVector32;
00059
class UnicodeSet;
00060
struct REStackFrame;
00061
struct Regex8BitSet;
00062
class RuleBasedBreakIterator;
00063
class RegexCImpl;
00064
00065
00066
00067
00072
#ifdef REGEX_DEBUG
00073
U_INTERNAL void U_EXPORT2
00074
RegexPatternDump(
const RegexPattern *pat);
00075
#else
00076
#define RegexPatternDump(pat)
00077
#endif
00078
00079
00080
00092
class U_I18N_API RegexPattern:
public UObject {
00093
public:
00094
00102 RegexPattern();
00103
00109 RegexPattern(
const RegexPattern &source);
00110
00116
virtual ~RegexPattern();
00117
00126
UBool operator==(
const RegexPattern& that)
const;
00127
00136
inline UBool operator!=(
const RegexPattern& that)
const {
return ! operator ==(that);};
00137
00143 RegexPattern &operator =(
const RegexPattern &source);
00144
00152
virtual RegexPattern *clone() const;
00153
00154
00175 static RegexPattern *compile( const
UnicodeString ®ex,
00176
UParseError &pe,
00177 UErrorCode &status);
00178
00199 static RegexPattern *compile( const
UnicodeString ®ex,
00200 uint32_t flags,
00201
UParseError &pe,
00202 UErrorCode &status);
00203
00204
00223 static RegexPattern *compile( const
UnicodeString ®ex,
00224 uint32_t flags,
00225 UErrorCode &status);
00226
00227
00233 virtual uint32_t flags() const;
00234
00247 virtual
RegexMatcher *matcher(const
UnicodeString &input,
00248 UErrorCode &status) const;
00249
00250
00262 virtual
RegexMatcher *matcher(UErrorCode &status) const;
00263
00264
00279 static
UBool matches(const
UnicodeString ®ex,
00280 const
UnicodeString &input,
00281
UParseError &pe,
00282 UErrorCode &status);
00283
00284
00289 virtual
UnicodeString pattern() const;
00290
00291
00317 virtual int32_t split(const
UnicodeString &input,
00318
UnicodeString dest[],
00319 int32_t destCapacity,
00320 UErrorCode &status) const;
00321
00322
00328 virtual
UClassID getDynamicClassID() const;
00329
00335 static UClassID getStaticClassID();
00336
00337 private:
00338
00339
00340
00341
UnicodeString fPattern;
00342 uint32_t fFlags;
00343
00344 UVector32 *fCompiledPat;
00345
UnicodeString fLiteralText;
00346
00347
00348 UVector *fSets;
00349 Regex8BitSet *fSets8;
00350
00351
00352 UErrorCode fDeferredStatus;
00353
00354
00355 int32_t fMinMatchLen;
00356
00357
00358
00359
00360 int32_t fFrameSize;
00361
00362
00363 int32_t fDataSize;
00364
00365
00366
00367 UVector32 *fGroupMap;
00368
00369
00370 int32_t fMaxCaptureDigits;
00371
00372
UnicodeSet **fStaticSets;
00373
00374
00375 Regex8BitSet *fStaticSets8;
00376
00377
00378 int32_t fStartType;
00379 int32_t fInitialStringIdx;
00380 int32_t fInitialStringLen;
00381
UnicodeSet *fInitialChars;
00382 UChar32 fInitialChar;
00383 Regex8BitSet *fInitialChars8;
00384
00385 friend class RegexCompile;
00386 friend class
RegexMatcher;
00387 friend class RegexCImpl;
00388
00389
00390
00391
00392
void init();
00393
void zap();
00394 #ifdef REGEX_DEBUG
00395
void dumpOp(int32_t index) const;
00396 friend
void RegexPatternDump(const RegexPattern *);
00397 #endif
00398
00399 };
00400
00401
00402
00412 class U_I18N_API RegexMatcher: public
UObject {
00413
public:
00414
00429 RegexMatcher(
const UnicodeString ®exp,
uint32_t flags,
UErrorCode &status);
00430
00446 RegexMatcher(
const UnicodeString ®exp,
const UnicodeString &input,
00447
uint32_t flags,
UErrorCode &status);
00448
00449
00455
virtual ~RegexMatcher();
00456
00457
00464
virtual UBool matches(
UErrorCode &status);
00465
00474
virtual UBool matches(
int32_t startIndex,
UErrorCode &status);
00475
00476
00477
00478
00491
virtual UBool lookingAt(
UErrorCode &status);
00492
00493
00507
virtual UBool lookingAt(
int32_t startIndex,
UErrorCode &status);
00508
00521
virtual UBool find();
00522
00523
00533
virtual UBool find(
int32_t start,
UErrorCode &status);
00534
00535
00545
virtual UnicodeString group(
UErrorCode &status)
const;
00546
00547
00560
virtual UnicodeString group(
int32_t groupNum,
UErrorCode &status)
const;
00561
00562
00568
virtual int32_t groupCount()
const;
00569
00570
00578
virtual int32_t start(
UErrorCode &status)
const;
00579
00580
00594
virtual int32_t start(
int group,
UErrorCode &status)
const;
00595
00596
00606
virtual int32_t end(
UErrorCode &status)
const;
00607
00608
00622
virtual int32_t end(
int group,
UErrorCode &status)
const;
00623
00624
00633
virtual RegexMatcher &reset();
00634
00635
00645
virtual RegexMatcher &reset(
int32_t index,
UErrorCode &status);
00646
00647
00655
virtual RegexMatcher &reset(
const UnicodeString &input);
00656
00657
00664
virtual const UnicodeString &input()
const;
00665
00666
00672
virtual const RegexPattern &pattern()
const;
00673
00674
00691
virtual UnicodeString replaceAll(
const UnicodeString &replacement,
UErrorCode &status);
00692
00693
00714
virtual UnicodeString replaceFirst(
const UnicodeString &replacement,
UErrorCode &status);
00715
00743
virtual RegexMatcher &appendReplacement(
UnicodeString &dest,
00744
const UnicodeString &replacement,
UErrorCode &status);
00745
00746
00757
virtual UnicodeString &appendTail(
UnicodeString &dest);
00758
00759
00760
00785
virtual int32_t split(
const UnicodeString &input,
00786
UnicodeString dest[],
00787
int32_t destCapacity,
00788
UErrorCode &status);
00789
00790
00791
00797
void setTrace(
UBool state);
00798
00799
00805
static UClassID getStaticClassID();
00806
00812
virtual UClassID getDynamicClassID()
const;
00813
00814
private:
00815
00816
00817 RegexMatcher();
00818 RegexMatcher(
const RegexPattern *pat);
00819 RegexMatcher(
const RegexMatcher &other);
00820 RegexMatcher &operator =(
const RegexMatcher &rhs);
00821
friend class RegexPattern;
00822
friend class RegexCImpl;
00823
00824
00825
00826
00827
00828
00829
void MatchAt(
int32_t startIdx,
UErrorCode &status);
00830
inline void backTrack(
int32_t &inputIdx,
int32_t &patIdx);
00831
UBool isWordBoundary(
int32_t pos);
00832
UBool isUWordBoundary(
int32_t pos);
00833 REStackFrame *resetStack();
00834
inline REStackFrame *StateSave(REStackFrame *fp,
int32_t savePatIdx,
00835
int32_t frameSize,
UErrorCode &status);
00836
00837
00838
const RegexPattern *fPattern;
00839 RegexPattern *fPatternOwned;
00840
00841
const UnicodeString *fInput;
00842
00843
UBool fMatch;
00844
int32_t fMatchStart;
00845
int32_t fMatchEnd;
00846
int32_t fLastMatchEnd;
00847
00848 UVector32 *fStack;
00849 REStackFrame *fFrame;
00850
00851
00852
00853
int32_t *fData;
00854
int32_t fSmallData[8];
00855
00856
UBool fTraceDebug;
00857
00858
UErrorCode fDeferredStatus;
00859
00860
00861
RuleBasedBreakIterator *fWordBreakItr;
00862
00863
00864 };
00865
00866
U_NAMESPACE_END
00867
#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
00868
#endif