00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00032 #ifndef __UTF16_H__
00033 #define __UTF16_H__
00034
00035
00036
00037
00038 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00039 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00040
00041 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00042
00043
00044 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00045
00046 #define UTF16_GET_PAIR_VALUE(first, second) \
00047 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00048
00049
00050 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00051 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00052 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00053
00054
00055 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00056 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00057 #define UTF16_MAX_CHAR_LENGTH 2
00058
00059
00060 #define UTF16_ARRAY_SIZE(size) (size)
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00073 (c)=(s)[i]; \
00074 if(UTF_IS_SURROGATE(c)) { \
00075 if(UTF_IS_SURROGATE_FIRST(c)) { \
00076 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00077 } else { \
00078 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00079 } \
00080 } \
00081 }
00082
00083 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00084 (c)=(s)[i]; \
00085 if(UTF_IS_SURROGATE(c)) { \
00086 uint16_t __c2; \
00087 if(UTF_IS_SURROGATE_FIRST(c)) { \
00088 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00089 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00090 \
00091 } else if(strict) {\
00092 \
00093 (c)=UTF_ERROR_VALUE; \
00094 } \
00095 } else { \
00096 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00097 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00098 \
00099 } else if(strict) {\
00100 \
00101 (c)=UTF_ERROR_VALUE; \
00102 } \
00103 } \
00104 \
00105 } \
00106 }
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00124 (c)=(s)[(i)++]; \
00125 if(UTF_IS_FIRST_SURROGATE(c)) { \
00126 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00127 } \
00128 }
00129
00130 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00131 if((uint32_t)(c)<=0xffff) { \
00132 (s)[(i)++]=(uint16_t)(c); \
00133 } else { \
00134 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00135 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00136 } \
00137 }
00138
00139 #define UTF16_FWD_1_UNSAFE(s, i) { \
00140 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00141 ++(i); \
00142 } \
00143 }
00144
00145 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00146 UTextOffset __N=(n); \
00147 while(__N>0) { \
00148 UTF16_FWD_1_UNSAFE(s, i); \
00149 --__N; \
00150 } \
00151 }
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00163 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00164 --(i); \
00165 } \
00166 }
00167
00168
00169
00170 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00171 (c)=(s)[(i)++]; \
00172 if(UTF_IS_FIRST_SURROGATE(c)) { \
00173 uint16_t __c2; \
00174 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00175 ++(i); \
00176 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00177 \
00178 } else if(strict) {\
00179 \
00180 (c)=UTF_ERROR_VALUE; \
00181 } \
00182 } else if(strict && UTF_IS_SECOND_SURROGATE(c)) { \
00183 \
00184 (c)=UTF_ERROR_VALUE; \
00185 \
00186 } \
00187 }
00188
00189 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00190 if((uint32_t)(c)<=0xffff) { \
00191 (s)[(i)++]=(uint16_t)(c); \
00192 } else if((uint32_t)(c)<=0x10ffff) { \
00193 if((i)+1<(length)) { \
00194 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00195 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00196 } else { \
00197 (s)[(i)++]=UTF_ERROR_VALUE; \
00198 } \
00199 } else { \
00200 (s)[(i)++]=UTF_ERROR_VALUE; \
00201 } \
00202 }
00203
00204 #define UTF16_FWD_1_SAFE(s, i, length) { \
00205 if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
00206 ++(i); \
00207 } \
00208 }
00209
00210 #define UTF16_FWD_N_SAFE(s, i, length, n) { \
00211 UTextOffset __N=(n); \
00212 while(__N>0 && (i)<(length)) { \
00213 UTF16_FWD_1_SAFE(s, i, length); \
00214 --__N; \
00215 } \
00216 }
00217
00218 #define UTF16_SET_CHAR_START_SAFE(s, start, i) { \
00219 if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00220 --(i); \
00221 } \
00222 }
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00241 (c)=(s)[--(i)]; \
00242 if(UTF_IS_SECOND_SURROGATE(c)) { \
00243 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00244 } \
00245 }
00246
00247 #define UTF16_BACK_1_UNSAFE(s, i) { \
00248 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00249 --(i); \
00250 } \
00251 }
00252
00253 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00254 UTextOffset __N=(n); \
00255 while(__N>0) { \
00256 UTF16_BACK_1_UNSAFE(s, i); \
00257 --__N; \
00258 } \
00259 }
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00271 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00272 ++(i); \
00273 } \
00274 }
00275
00276
00277
00278 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00279 (c)=(s)[--(i)]; \
00280 if(UTF_IS_SECOND_SURROGATE(c)) { \
00281 uint16_t __c2; \
00282 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00283 --(i); \
00284 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00285 \
00286 } else if(strict) {\
00287 \
00288 (c)=UTF_ERROR_VALUE; \
00289 } \
00290 } else if(strict && UTF_IS_FIRST_SURROGATE(c)) { \
00291 \
00292 (c)=UTF_ERROR_VALUE; \
00293 \
00294 } \
00295 }
00296
00297 #define UTF16_BACK_1_SAFE(s, start, i) { \
00298 if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00299 --(i); \
00300 } \
00301 }
00302
00303 #define UTF16_BACK_N_SAFE(s, start, i, n) { \
00304 UTextOffset __N=(n); \
00305 while(__N>0 && (i)>(start)) { \
00306 UTF16_BACK_1_SAFE(s, start, i); \
00307 --__N; \
00308 } \
00309 }
00310
00311 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
00312 if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
00313 ++(i); \
00314 } \
00315 }
00316
00317 #endif