00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00087 #ifndef __UTF_H__
00088 #define __UTF_H__
00089
00090
00091
00092
00093
00094 #include "unicode/umachine.h"
00095 #include <stddef.h>
00096
00097
00098
00099 #ifndef UTF_SIZE
00100
00101 # define UTF_SIZE 16
00102 #endif
00103
00105 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00106
00111 #ifndef U_HAVE_WCHAR_H
00112 # define U_HAVE_WCHAR_H 1
00113 #endif
00114
00115
00116 #if U_SIZEOF_WCHAR_T==0
00117 # undef U_SIZEOF_WCHAR_T
00118 # define U_SIZEOF_WCHAR_T 4
00119 #endif
00120
00129 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
00130 # ifdef __STDC_ISO_10646__
00131 # if (U_SIZEOF_WCHAR_T==2)
00132 # define U_WCHAR_IS_UTF16
00133 # elif (U_SIZEOF_WCHAR_T==4)
00134 # define U_WCHAR_IS_UTF32
00135 # endif
00136 # elif defined __UCS2__
00137 # if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
00138 # define U_WCHAR_IS_UTF16
00139 # endif
00140 # elif defined __UCS4__
00141 # if (U_SIZEOF_WCHAR_T==4)
00142 # define U_WCHAR_IS_UTF32
00143 # endif
00144 # elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
00145 # define U_WCHAR_IS_UTF16
00146 # endif
00147 #endif
00148
00154 #if U_SIZEOF_WCHAR_T==4
00155 typedef wchar_t UChar32;
00156 #else
00157 typedef uint32_t UChar32;
00158 #endif
00159
00167 typedef int32_t UTextOffset;
00168
00169
00170 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00171
00175 # define UTF_SAFE
00176 #endif
00177
00178
00179
00192 #define UTF8_ERROR_VALUE_1 0x15
00193
00196 #define UTF8_ERROR_VALUE_2 0x9f
00197
00202 #define UTF_ERROR_VALUE 0xffff
00203
00204
00205
00207 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00208
00212 #define UTF_IS_UNICODE_NONCHAR(c) \
00213 ((c)>=0xfdd0 && \
00214 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00215 (uint32_t)(c)<=0x10ffff)
00216
00230 #define UTF_IS_UNICODE_CHAR(c) \
00231 ((uint32_t)(c)<0xd800 || \
00232 ((uint32_t)(c)>0xdfff && \
00233 (uint32_t)(c)<=0x10ffff && \
00234 !UTF_IS_UNICODE_NONCHAR(c)))
00235
00240 #define UTF_IS_ERROR(c) \
00241 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00242
00244 #define UTF_IS_VALID(c) \
00245 (UTF_IS_UNICODE_CHAR(c) && \
00246 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00247
00248
00249
00250 #include "unicode/utf8.h"
00251 #include "unicode/utf16.h"
00252 #include "unicode/utf32.h"
00253
00254
00255
00262 #if UTF_SIZE==8
00263
00264 # error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00265
00266
00267
00268
00269
00270 # include <limits.h>
00271
00272
00273 # if CHAR_MAX>=255
00274 typedef char UChar;
00275 # else
00276 typedef uint8_t UChar;
00277 # endif
00278
00279 #elif UTF_SIZE==16
00280
00281
00282 # if U_SIZEOF_WCHAR_T==2
00283 typedef wchar_t UChar;
00284 # else
00285 typedef uint16_t UChar;
00286 # endif
00287
00289 # define UTF_IS_SINGLE(uchar) UTF16_IS_SINGLE(uchar)
00290
00291 # define UTF_IS_LEAD(uchar) UTF16_IS_LEAD(uchar)
00292
00293 # define UTF_IS_TRAIL(uchar) UTF16_IS_TRAIL(uchar)
00294
00296 # define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
00297
00298 # define UTF_CHAR_LENGTH(c) UTF16_CHAR_LENGTH(c)
00299
00300 # define UTF_MAX_CHAR_LENGTH UTF16_MAX_CHAR_LENGTH
00301
00302 # define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00303
00305 # define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
00306
00307 # define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00308
00310 # define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00311
00312 # define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00313
00315 # define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00316
00317 # define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00318
00320 # define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
00321
00322 # define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
00323
00325 # define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
00326
00327 # define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
00328
00330 # define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
00331
00332 # define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
00333
00335 # define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
00336
00337 # define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00338
00340 # define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
00341
00342 # define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
00343
00345 # define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
00346
00347 # define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
00348
00350 # define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00351
00352 # define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00353
00354 #elif UTF_SIZE==32
00355
00356 # error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00357
00358 typedef UChar32 UChar;
00359
00360 #else
00361 # error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00362 #endif
00363
00364
00365
00473 #ifdef UTF_SAFE
00474
00475 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00476
00477 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00478 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00479 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00480 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00481 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00482
00483 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00484 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00485 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00486 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00487
00488 #elif defined(UTF_STRICT)
00489
00490 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00491
00492 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00493 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00494 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00495 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00496 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00497
00498 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00499 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00500 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00501 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00502
00503 #else
00504
00505 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00506
00507 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_UNSAFE(s, i, c)
00508 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_UNSAFE(s, i, c)
00509 # define UTF_FWD_1(s, i, length) UTF_FWD_1_UNSAFE(s, i)
00510 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_UNSAFE(s, i, n)
00511 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_UNSAFE(s, i)
00512
00513 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c)
00514 # define UTF_BACK_1(s, start, i) UTF_BACK_1_UNSAFE(s, i)
00515 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_UNSAFE(s, i, n)
00516 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00517
00518 #endif
00519
00520 #endif