Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

utf.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999-2001, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 1999sep09
00014 *   created by: Markus W. Scherer
00015 */
00016 
00087 #ifndef __UTF_H__
00088 #define __UTF_H__
00089 
00090 /*
00091  * ANSI C headers:
00092  * stddef.h defines wchar_t
00093  */
00094 #include "unicode/umachine.h"
00095 #include <stddef.h>
00096 /* include the utfXX.h after the following definitions */
00097 
00098 /* If there is no compiler option for the preferred UTF size, then default to UTF-16. */
00099 #ifndef UTF_SIZE
00100 
00101 #   define UTF_SIZE 16
00102 #endif
00103 
00105 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00106 
00111 #ifndef U_HAVE_WCHAR_H
00112 #   define U_HAVE_WCHAR_H 1
00113 #endif
00114 
00115 /* U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it) */
00116 #if U_SIZEOF_WCHAR_T==0
00117 #   undef U_SIZEOF_WCHAR_T
00118 #   define U_SIZEOF_WCHAR_T 4
00119 #endif
00120 
00129 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
00130 #   ifdef __STDC_ISO_10646__ 
00131 #       if (U_SIZEOF_WCHAR_T==2)
00132 #           define U_WCHAR_IS_UTF16
00133 #       elif (U_SIZEOF_WCHAR_T==4)
00134 #           define  U_WCHAR_IS_UTF32
00135 #       endif
00136 #   elif defined __UCS2__
00137 #       if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
00138 #           define U_WCHAR_IS_UTF16
00139 #       endif
00140 #   elif defined __UCS4__
00141 #       if (U_SIZEOF_WCHAR_T==4)
00142 #           define U_WCHAR_IS_UTF32
00143 #       endif
00144 #   elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
00145 #       define U_WCHAR_IS_UTF16    
00146 #   endif
00147 #endif
00148 
00154 #if U_SIZEOF_WCHAR_T==4
00155     typedef wchar_t UChar32;
00156 #else
00157     typedef uint32_t UChar32;
00158 #endif
00159 
00167 typedef int32_t UTextOffset;
00168 
00169 /* Specify which macro versions are the default ones - safe or fast. */
00170 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00171 
00175 #   define UTF_SAFE
00176 #endif
00177 
00178 /* internal definitions ----------------------------------------------------- */
00179 
00192 #define UTF8_ERROR_VALUE_1 0x15
00193 
00196 #define UTF8_ERROR_VALUE_2 0x9f
00197 
00202 #define UTF_ERROR_VALUE 0xffff
00203 
00204 /* single-code point definitions -------------------------------------------- */
00205 
00207 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00208 
00212 #define UTF_IS_UNICODE_NONCHAR(c) \
00213     ((c)>=0xfdd0 && \
00214      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00215      (uint32_t)(c)<=0x10ffff)
00216 
00230 #define UTF_IS_UNICODE_CHAR(c) \
00231     ((uint32_t)(c)<0xd800 || \
00232         ((uint32_t)(c)>0xdfff && \
00233          (uint32_t)(c)<=0x10ffff && \
00234          !UTF_IS_UNICODE_NONCHAR(c)))
00235 
00240 #define UTF_IS_ERROR(c) \
00241     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00242 
00244 #define UTF_IS_VALID(c) \
00245     (UTF_IS_UNICODE_CHAR(c) && \
00246      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00247 
00248 /* include the utfXX.h ------------------------------------------------------ */
00249 
00250 #include "unicode/utf8.h"
00251 #include "unicode/utf16.h"
00252 #include "unicode/utf32.h"
00253 
00254 /* Define types and macros according to the selected UTF size. -------------- */
00255 
00262 #if UTF_SIZE==8
00263 
00264 #   error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00265 
00266 /*
00267  * ANSI C header:
00268  * limits.h defines CHAR_MAX
00269  */
00270 #   include <limits.h>
00271 
00272     /* Define UChar to be compatible with char if possible. */
00273 #   if CHAR_MAX>=255
00274         typedef char UChar;
00275 #   else
00276         typedef uint8_t UChar;
00277 #   endif
00278 
00279 #elif UTF_SIZE==16
00280 
00281     /* Define UChar to be compatible with wchar_t if possible. */
00282 #   if U_SIZEOF_WCHAR_T==2
00283         typedef wchar_t UChar;
00284 #   else
00285         typedef uint16_t UChar;
00286 #   endif
00287 
00289 #   define UTF_IS_SINGLE(uchar)                         UTF16_IS_SINGLE(uchar)
00290 
00291 #   define UTF_IS_LEAD(uchar)                           UTF16_IS_LEAD(uchar)
00292 
00293 #   define UTF_IS_TRAIL(uchar)                          UTF16_IS_TRAIL(uchar)
00294 
00296 #   define UTF_NEED_MULTIPLE_UCHAR(c)                   UTF16_NEED_MULTIPLE_UCHAR(c)
00297 
00298 #   define UTF_CHAR_LENGTH(c)                           UTF16_CHAR_LENGTH(c)
00299 
00300 #   define UTF_MAX_CHAR_LENGTH                          UTF16_MAX_CHAR_LENGTH
00301 
00302 #   define UTF_ARRAY_SIZE(size)                         UTF16_ARRAY_SIZE(size)
00303 
00305 #   define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00306 
00307 #   define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00308 
00310 #   define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00311 
00312 #   define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00313 
00315 #   define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00316 
00317 #   define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00318 
00320 #   define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00321 
00322 #   define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00323 
00325 #   define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00326 
00327 #   define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00328 
00330 #   define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00331 
00332 #   define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00333 
00335 #   define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00336 
00337 #   define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00338 
00340 #   define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00341 
00342 #   define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00343 
00345 #   define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00346 
00347 #   define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00348 
00350 #   define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00351 
00352 #   define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00353 
00354 #elif UTF_SIZE==32
00355 
00356 #   error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00357 
00358     typedef UChar32 UChar;
00359 
00360 #else
00361 #   error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00362 #endif
00363 
00364 /* Define the default macros for handling UTF characters. ------------------- */
00365 
00473 #ifdef UTF_SAFE
00474 
00475 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00476 
00477 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00478 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
00479 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
00480 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
00481 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
00482 
00483 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00484 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
00485 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
00486 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00487 
00488 #elif defined(UTF_STRICT)
00489 
00490 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00491 
00492 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00493 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
00494 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
00495 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
00496 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
00497 
00498 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00499 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
00500 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
00501 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00502 
00503 #else /* UTF_UNSAFE */
00504 
00505 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00506 
00507 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_UNSAFE(s, i, c)
00508 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_UNSAFE(s, i, c)
00509 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_UNSAFE(s, i)
00510 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_UNSAFE(s, i, n)
00511 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_UNSAFE(s, i)
00512 
00513 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_UNSAFE(s, i, c)
00514 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_UNSAFE(s, i)
00515 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_UNSAFE(s, i, n)
00516 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00517 
00518 #endif
00519 
00520 #endif

Generated on Thu Aug 15 14:13:34 2002 for ICU 2.2 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001