00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef UNICODE_H
00028 #define UNICODE_H
00029
00030 #include "unicode/utypes.h"
00031 #include "unicode/uchar.h"
00032
00033 U_NAMESPACE_BEGIN
00055 class U_COMMON_API Unicode
00056 {
00057 public:
00058
00059
00060
00061
00062
00063
00064
00065 enum {
00067 MIN_VALUE=0,
00068
00074 MAX_VALUE=0x10ffff,
00075
00083 MAX_CHAR_LENGTH=UTF_MAX_CHAR_LENGTH,
00084
00095 MIN_RADIX=2,
00096
00107 MAX_RADIX=36
00108 };
00109
00116 enum EUnicodeGeneralTypes
00117 {
00118 UNASSIGNED = 0,
00119 UPPERCASE_LETTER = 1,
00120 LOWERCASE_LETTER = 2,
00121 TITLECASE_LETTER = 3,
00122 MODIFIER_LETTER = 4,
00123 OTHER_LETTER = 5,
00124 NON_SPACING_MARK = 6,
00125 ENCLOSING_MARK = 7,
00126 COMBINING_SPACING_MARK = 8,
00127 DECIMAL_DIGIT_NUMBER = 9,
00128 LETTER_NUMBER = 10,
00129 OTHER_NUMBER = 11,
00130 SPACE_SEPARATOR = 12,
00131 LINE_SEPARATOR = 13,
00132 PARAGRAPH_SEPARATOR = 14,
00133 CONTROL = 15,
00134 FORMAT = 16,
00135 PRIVATE_USE = 17,
00136 SURROGATE = 18,
00137 DASH_PUNCTUATION = 19,
00138 START_PUNCTUATION = 20,
00139 END_PUNCTUATION = 21,
00140 CONNECTOR_PUNCTUATION = 22,
00141 OTHER_PUNCTUATION = 23,
00142 MATH_SYMBOL = 24,
00143 CURRENCY_SYMBOL = 25,
00144 MODIFIER_SYMBOL = 26,
00145 OTHER_SYMBOL = 27,
00146 INITIAL_PUNCTUATION = 28,
00147 FINAL_PUNCTUATION = 29,
00148 GENERAL_TYPES_COUNT = 30
00149 };
00150
00151
00157 enum EUnicodeScript
00158 {
00159 kBasicLatin=UBLOCK_BASIC_LATIN,
00160 kLatin1Supplement,
00161 kLatinExtendedA,
00162 kLatinExtendedB,
00163 kIPAExtension,
00164 kSpacingModifier,
00165 kCombiningDiacritical,
00166 kGreek,
00167 kCyrillic,
00168 kArmenian,
00169 kHebrew,
00170 kArabic,
00171 kSyriac,
00172 kThaana,
00173 kDevanagari,
00174 kBengali,
00175 kGurmukhi,
00176 kGujarati,
00177 kOriya,
00178 kTamil,
00179 kTelugu,
00180 kKannada,
00181 kMalayalam,
00182 kSinhala,
00183 kThai,
00184 kLao,
00185 kTibetan,
00186 kMyanmar,
00187 kGeorgian,
00188 kHangulJamo,
00189 kEthiopic,
00190 kCherokee,
00191 kUnifiedCanadianAboriginalSyllabics,
00192 kogham,
00193 kRunic,
00194 kKhmer,
00195 kMongolian,
00196 kLatinExtendedAdditional,
00197 kGreekExtended,
00198 kGeneralPunctuation,
00199 kSuperSubScript,
00200 kCurrencySymbolScript,
00201 kSymbolCombiningMark,
00202 kLetterlikeSymbol,
00203 kNumberForm,
00204 kArrow,
00205 kMathOperator,
00206 kMiscTechnical,
00207 kControlPicture,
00208 kOpticalCharacter,
00209 kEnclosedAlphanumeric,
00210 kBoxDrawing,
00211 kBlockElement,
00212 kGeometricShape,
00213 kMiscSymbol,
00214 kDingbat,
00215 kBraillePatterns,
00216 kCJKRadicalsSupplement,
00217 kKangxiRadicals,
00218 kIdeographicDescriptionCharacters,
00219 kCJKSymbolPunctuation,
00220 kHiragana,
00221 kKatakana,
00222 kBopomofo,
00223 kHangulCompatibilityJamo,
00224 kKanbun,
00225 kBopomofoExtended,
00226 kEnclosedCJKLetterMonth,
00227 kCJKCompatibility,
00228 kCJKUnifiedIdeographExtensionA,
00229 kCJKUnifiedIdeograph,
00230 kYiSyllables,
00231 kYiRadicals,
00232 kHangulSyllable,
00233 kHighSurrogate,
00234 kHighPrivateUseSurrogate,
00235 kLowSurrogate,
00236 kPrivateUse,
00237 kCJKCompatibilityIdeograph,
00238 kAlphabeticPresentation,
00239 kArabicPresentationA,
00240 kCombiningHalfMark,
00241 kCJKCompatibilityForm,
00242 kSmallFormVariant,
00243 kArabicPresentationB,
00244 kNoScript,
00245 kHalfwidthFullwidthForm,
00246 kScriptCount=UBLOCK_COUNT
00247 };
00248
00254 enum EDirectionProperty {
00255 LEFT_TO_RIGHT = 0,
00256 RIGHT_TO_LEFT = 1,
00257 EUROPEAN_NUMBER = 2,
00258 EUROPEAN_NUMBER_SEPARATOR = 3,
00259 EUROPEAN_NUMBER_TERMINATOR = 4,
00260 ARABIC_NUMBER = 5,
00261 COMMON_NUMBER_SEPARATOR = 6,
00262 BLOCK_SEPARATOR = 7,
00263 SEGMENT_SEPARATOR = 8,
00264 WHITE_SPACE_NEUTRAL = 9,
00265 OTHER_NEUTRAL = 10,
00266 LEFT_TO_RIGHT_EMBEDDING = 11,
00267 LEFT_TO_RIGHT_OVERRIDE = 12,
00268 RIGHT_TO_LEFT_ARABIC = 13,
00269 RIGHT_TO_LEFT_EMBEDDING = 14,
00270 RIGHT_TO_LEFT_OVERRIDE = 15,
00271 POP_DIRECTIONAL_FORMAT = 16,
00272 DIR_NON_SPACING_MARK = 17,
00273 BOUNDARY_NEUTRAL = 18
00274 };
00275
00282 enum ECellWidths
00283 {
00284 ZERO_WIDTH = 0,
00285 HALF_WIDTH = 1,
00286 FULL_WIDTH = 2,
00287 NEUTRAL = 3
00288 };
00289
00301 static inline UBool isSingle(UChar c);
00302
00312 static inline UBool isLead(UChar c);
00313
00323 static inline UBool isTrail(UChar c);
00324
00336 static inline UBool isSurrogate(UChar32 c);
00337
00351 static inline UBool isUnicodeChar(UChar32 c);
00352
00365 static inline UBool isError(UChar32 c);
00366
00377 static inline UBool isValid(UChar32 c);
00378
00391 static inline UBool needMultipleUChar(UChar32 c);
00392
00402 static inline int32_t charLength(UChar32 c);
00403
00418 static inline int32_t arraySize(int32_t size);
00419
00433 static inline UBool isLowerCase(UChar32 ch);
00434
00447 static inline UBool isUpperCase(UChar32 ch);
00448
00461 static inline UBool isTitleCase(UChar32 ch);
00462
00475 static inline UBool isDigit(UChar32 ch);
00476
00493 static inline UBool isDefined(UChar32 ch);
00494
00506 static inline UBool isControl(UChar32 ch);
00507
00519 static inline UBool isPrintable(UChar32 ch);
00520
00533 static inline UBool isBaseForm(UChar32 ch);
00534
00551 static inline UBool isLetter(UChar32 ch);
00552
00574 static inline UBool isJavaIdentifierStart(UChar32 ch);
00575
00605 static inline UBool isJavaIdentifierPart(UChar32 ch);
00606
00622 static inline UBool isUnicodeIdentifierStart(UChar32 ch);
00623
00651 static inline UBool isUnicodeIdentifierPart(UChar32 ch);
00652
00679 static inline UBool isIdentifierIgnorable(UChar32 ch);
00680
00706 static inline UChar32 toLowerCase(UChar32 ch);
00707
00730 static inline UChar32 toUpperCase(UChar32 ch);
00731
00750 static inline UChar32 toTitleCase(UChar32 ch);
00751
00766 static inline UChar32
00767 foldCase(UChar32 c, uint32_t options);
00768
00778 static inline UBool isSpaceChar(UChar32 ch);
00779
00809 static inline UBool isWhitespace(UChar32 ch);
00810
00846 static inline int8_t getType(UChar32 ch);
00847
00856 static inline uint8_t getCombiningClass(UChar32 c);
00857
00870 static inline EDirectionProperty characterDirection(UChar32 ch);
00871
00883 static inline UBool isMirrored(UChar32 c);
00884
00902 static inline UChar32 charMirror(UChar32 c);
00903
00911 static inline EUnicodeScript getScript(UChar32 ch);
00912
00966 static inline uint16_t getCellWidth(UChar32 ch);
00967
00996 static inline int32_t
00997 getCharName(uint32_t code,
00998 char *buffer, int32_t bufferLength,
00999 UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME);
01000
01012 static inline int32_t digitValue(UChar32 ch);
01013
01052 static inline int32_t digit(UChar32 ch, int8_t radix);
01053
01082 static inline UChar32 forDigit(int32_t digit, int8_t radix);
01083
01090 static void getUnicodeVersion(UVersionInfo info);
01091
01092 protected:
01093
01094
01095
01096
01097
01098 Unicode();
01099
01100
01101
01102 Unicode(const Unicode &other);
01103 ~Unicode();
01104
01105
01106
01107
01108 const Unicode &operator=(const Unicode &other);
01109 };
01110
01111
01112
01113 inline UBool
01114 Unicode::isSingle(UChar c) {
01115 return UTF_IS_SINGLE(c);
01116 }
01117
01118 inline UBool
01119 Unicode::isLead(UChar c) {
01120 return UTF_IS_LEAD(c);
01121 }
01122
01123 inline UBool
01124 Unicode::isTrail(UChar c) {
01125 return UTF_IS_TRAIL(c);
01126 }
01127
01128 inline UBool
01129 Unicode::isSurrogate(UChar32 c) {
01130 return UTF_IS_SURROGATE(c);
01131 }
01132
01133 inline UBool
01134 Unicode::isUnicodeChar(UChar32 c) {
01135 return UTF_IS_UNICODE_CHAR(c);
01136 }
01137
01138 inline UBool
01139 Unicode::isError(UChar32 c) {
01140 return UTF_IS_ERROR(c);
01141 }
01142
01143 inline UBool
01144 Unicode::isValid(UChar32 c) {
01145 return UTF_IS_VALID(c);
01146 }
01147
01148 inline UBool
01149 Unicode::needMultipleUChar(UChar32 c) {
01150 return UTF_NEED_MULTIPLE_UCHAR(c);
01151 }
01152
01153 inline int32_t
01154 Unicode::charLength(UChar32 c) {
01155 return UTF_CHAR_LENGTH(c);
01156 }
01157
01158 inline int32_t
01159 Unicode::arraySize(int32_t size) {
01160 return UTF_ARRAY_SIZE(size);
01161 }
01162
01163
01164 inline UBool
01165 Unicode::isLowerCase(UChar32 ch) {
01166 return u_islower(ch);
01167 }
01168
01169
01170 inline UBool
01171 Unicode::isUpperCase(UChar32 ch) {
01172 return u_isupper(ch);
01173 }
01174
01175
01176 inline UBool
01177 Unicode::isTitleCase(UChar32 ch) {
01178 return u_istitle(ch);
01179 }
01180
01181
01182 inline UBool
01183 Unicode::isDigit(UChar32 ch) {
01184 return u_isdigit(ch);
01185 }
01186
01187
01188 inline UBool
01189 Unicode::isDefined(UChar32 ch) {
01190 return u_isdefined(ch);
01191 }
01192
01193
01194 inline UBool
01195 Unicode::isControl(UChar32 ch) {
01196 return u_iscntrl(ch);
01197 }
01198
01199
01200 inline UBool
01201 Unicode::isPrintable(UChar32 ch) {
01202 return u_isprint(ch);
01203 }
01204
01205
01206 inline UBool
01207 Unicode::isBaseForm(UChar32 ch) {
01208 return u_isbase(ch);
01209 }
01210
01211
01212 inline UBool
01213 Unicode::isLetter(UChar32 ch) {
01214 return u_isalpha(ch);
01215 }
01216
01217
01218 inline UBool
01219 Unicode::isJavaIdentifierStart(UChar32 ch) {
01220 return u_isJavaIDStart(ch);
01221 }
01222
01223
01224
01225 inline UBool
01226 Unicode::isJavaIdentifierPart(UChar32 ch) {
01227 return u_isJavaIDPart(ch);
01228 }
01229
01230
01231 inline UBool
01232 Unicode::isUnicodeIdentifierStart(UChar32 ch) {
01233 return u_isIDStart(ch);
01234 }
01235
01236
01237
01238 inline UBool
01239 Unicode::isUnicodeIdentifierPart(UChar32 ch) {
01240 return u_isIDPart(ch);
01241 }
01242
01243
01244 inline UBool
01245 Unicode::isIdentifierIgnorable(UChar32 ch) {
01246 return u_isIDIgnorable(ch);
01247 }
01248
01249
01250 inline UChar32
01251 Unicode::toLowerCase(UChar32 ch) {
01252 return u_tolower(ch);
01253 }
01254
01255
01256 inline UChar32
01257 Unicode::toUpperCase(UChar32 ch) {
01258 return u_toupper(ch);
01259 }
01260
01261
01262 inline UChar32
01263 Unicode::toTitleCase(UChar32 ch) {
01264 return u_totitle(ch);
01265 }
01266
01267
01268 inline UChar32
01269 Unicode::foldCase(UChar32 ch, uint32_t options) {
01270 return u_foldCase(ch, options);
01271 }
01272
01273
01274 inline UBool
01275 Unicode::isSpaceChar(UChar32 ch) {
01276 return u_isspace(ch);
01277 }
01278
01279
01280 inline UBool
01281 Unicode::isWhitespace(UChar32 ch) {
01282 return u_isWhitespace(ch);
01283 }
01284
01285
01286 inline int8_t
01287 Unicode::getType(UChar32 ch) {
01288 return u_charType(ch);
01289 }
01290
01291 inline uint8_t
01292 Unicode::getCombiningClass(UChar32 c) {
01293 return u_getCombiningClass(c);
01294 }
01295
01296
01297 inline Unicode::EDirectionProperty
01298 Unicode::characterDirection(UChar32 ch) {
01299 return (EDirectionProperty)u_charDirection(ch);
01300 }
01301
01302
01303 inline UBool
01304 Unicode::isMirrored(UChar32 ch) {
01305 return u_isMirrored(ch);
01306 }
01307
01308
01309 inline UChar32
01310 Unicode::charMirror(UChar32 ch) {
01311 return u_charMirror(ch);
01312 }
01313
01314
01315 inline Unicode::EUnicodeScript
01316 Unicode::getScript(UChar32 ch) {
01317 return (EUnicodeScript) u_charScript(ch);
01318 }
01319
01320
01321 inline uint16_t
01322 Unicode::getCellWidth(UChar32 ch) {
01323 return u_charCellWidth(ch);
01324 }
01325
01326 inline int32_t
01327 Unicode::getCharName(uint32_t code,
01328 char *buffer, int32_t bufferLength,
01329 UCharNameChoice nameChoice) {
01330 UErrorCode errorCode=U_ZERO_ERROR;
01331 int32_t length=u_charName(code, nameChoice, buffer, bufferLength, &errorCode);
01332 return U_SUCCESS(errorCode) ? length : 0;
01333 }
01334
01335 inline int32_t
01336 Unicode::digitValue(UChar32 ch) {
01337 return u_charDigitValue(ch);
01338 }
01339
01340 inline int32_t
01341 Unicode::digit(UChar32 ch, int8_t radix) {
01342 return u_digit(ch, radix);
01343 }
01344
01345 inline UChar32
01346 Unicode::forDigit(int32_t digit, int8_t radix) {
01347 return u_forDigit(digit, radix);
01348 }
01349
01350 inline void
01351 Unicode::getUnicodeVersion(UVersionInfo versionArray) {
01352 u_getUnicodeVersion(versionArray);
01353 }
01354 U_NAMESPACE_END
01355
01356 #endif