00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00083 #ifndef __UTF_H__
00084 #define __UTF_H__
00085
00086
00087
00088
00089
00090 #include <stddef.h>
00091 #include "unicode/umachine.h"
00092
00093
00094
00095 #ifndef UTF_SIZE
00096
00097 # define UTF_SIZE 16
00098 #endif
00099
00101 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00102
00107 #ifndef U_HAVE_WCHAR_H
00108 # define U_HAVE_WCHAR_H 1
00109 #endif
00110
00111
00112 #if U_SIZEOF_WCHAR_T==0
00113 # undef U_SIZEOF_WCHAR_T
00114
00115 # define U_SIZEOF_WCHAR_T 4
00116 #endif
00117
00123 #if U_SIZEOF_WCHAR_T==4
00124 typedef wchar_t UChar32;
00125 #else
00126 typedef uint32_t UChar32;
00127 #endif
00128
00133 typedef int32_t UTextOffset;
00134
00135
00136 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00137
00141 # define UTF_SAFE
00142 #endif
00143
00144
00145
00158 #define UTF8_ERROR_VALUE_1 0x15
00159 #define UTF8_ERROR_VALUE_2 0x9f
00160
00165 #define UTF_ERROR_VALUE 0xffff
00166
00167
00168
00170 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00171
00176 #define UTF_IS_UNICODE_CHAR(c) \
00177 ((uint32_t)(c)<=0x10ffff && \
00178 !UTF_IS_SURROGATE(c) && ((c)&0xfffe)!=0xfffe)
00179
00184 #define UTF_IS_ERROR(c) \
00185 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00186
00188 #define UTF_IS_VALID(c) \
00189 ((uint32_t)(c)<=0x10ffff && \
00190 !UTF_IS_SURROGATE(c) && \
00191 ((c)&0xfffe)!=0xfffe && \
00192 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00193
00194
00195
00196 #include "unicode/utf8.h"
00197 #include "unicode/utf16.h"
00198 #include "unicode/utf32.h"
00199
00200
00201
00208 #if UTF_SIZE==8
00209
00210 # error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00211
00212
00213
00214
00215
00216 # include <limits.h>
00217
00218
00219 # if CHAR_MAX>=255
00220 typedef char UChar;
00221 # else
00222 typedef uint8_t UChar;
00223 # endif
00224
00225 #elif UTF_SIZE==16
00226
00227
00228 # if U_SIZEOF_WCHAR_T==2
00229 typedef wchar_t UChar;
00230 # else
00231 typedef uint16_t UChar;
00232 # endif
00233
00235 # define UTF_IS_SINGLE(uchar) UTF16_IS_SINGLE(uchar)
00236
00237 # define UTF_IS_LEAD(uchar) UTF16_IS_LEAD(uchar)
00238
00239 # define UTF_IS_TRAIL(uchar) UTF16_IS_TRAIL(uchar)
00240
00242 # define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
00243
00244 # define UTF_CHAR_LENGTH(c) UTF16_CHAR_LENGTH(c)
00245
00246 # define UTF_MAX_CHAR_LENGTH UTF16_MAX_CHAR_LENGTH
00247
00248 # define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00249
00251 # define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
00252
00253 # define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00254
00256 # define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00257
00258 # define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00259
00261 # define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00262
00263 # define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00264
00266 # define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
00267
00268 # define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
00269
00271 # define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
00272
00273 # define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
00274
00276 # define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
00277
00278 # define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
00279
00281 # define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
00282
00283 # define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00284
00286 # define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
00287
00288 # define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
00289
00291 # define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
00292
00293 # define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
00294
00296 # define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00297
00298 # define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00299
00300 #elif UTF_SIZE==32
00301
00302 # error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00303
00304 typedef UChar32 UChar;
00305
00306 #else
00307 # error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00308 #endif
00309
00310
00311
00419 #ifdef UTF_SAFE
00420
00421 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00422
00423 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00424 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00425 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00426 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00427 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00428
00429 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00430 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00431 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00432 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00433
00434 #elif defined(UTF_STRICT)
00435
00436 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00437
00438 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00439 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00440 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00441 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00442 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00443
00444 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00445 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00446 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00447 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00448
00449 #else
00450
00451 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00452
00453 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_UNSAFE(s, i, c)
00454 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_UNSAFE(s, i, c)
00455 # define UTF_FWD_1(s, i, length) UTF_FWD_1_UNSAFE(s, i)
00456 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_UNSAFE(s, i, n)
00457 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_UNSAFE(s, i)
00458
00459 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c)
00460 # define UTF_BACK_1(s, start, i) UTF_BACK_1_UNSAFE(s, i)
00461 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_UNSAFE(s, i, n)
00462 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00463
00464 #endif
00465
00466 #endif