utf_old.h

Go to the documentation of this file.
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2002-2004, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: utf.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2002sep21 00014 * created by: Markus W. Scherer 00015 */ 00016 00140 #ifndef U_HIDE_DEPRECATED_API 00141 00142 /* utf.h must be included first. */ 00143 #ifndef __UTF_H__ 00144 # include "unicode/utf.h" 00145 #endif 00146 00147 #ifndef __UTF_OLD_H__ 00148 #define __UTF_OLD_H__ 00149 00150 /* Formerly utf.h, part 1 --------------------------------------------------- */ 00151 00152 #ifdef U_USE_UTF_DEPRECATES 00153 00160 typedef int32_t UTextOffset; 00161 #endif 00162 00164 #define UTF_SIZE 16 00165 00172 #define UTF_SAFE 00173 00174 #undef UTF_UNSAFE 00175 00176 #undef UTF_STRICT 00177 00190 #define UTF8_ERROR_VALUE_1 0x15 00191 00197 #define UTF8_ERROR_VALUE_2 0x9f 00198 00205 #define UTF_ERROR_VALUE 0xffff 00206 00213 #define UTF_IS_ERROR(c) \ 00214 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 00215 00221 #define UTF_IS_VALID(c) \ 00222 (UTF_IS_UNICODE_CHAR(c) && \ 00223 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 00224 00229 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 00230 00236 #define UTF_IS_UNICODE_NONCHAR(c) \ 00237 ((c)>=0xfdd0 && \ 00238 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 00239 (uint32_t)(c)<=0x10ffff) 00240 00256 #define UTF_IS_UNICODE_CHAR(c) \ 00257 ((uint32_t)(c)<0xd800 || \ 00258 ((uint32_t)(c)>0xdfff && \ 00259 (uint32_t)(c)<=0x10ffff && \ 00260 !UTF_IS_UNICODE_NONCHAR(c))) 00261 00262 /* Formerly utf8.h ---------------------------------------------------------- */ 00263 00268 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 00269 00274 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 00275 00277 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 00278 00279 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 00280 00281 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 00282 00284 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 00285 00299 #if 1 00300 # define UTF8_CHAR_LENGTH(c) \ 00301 ((uint32_t)(c)<=0x7f ? 1 : \ 00302 ((uint32_t)(c)<=0x7ff ? 2 : \ 00303 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 00304 ) \ 00305 ) 00306 #else 00307 # define UTF8_CHAR_LENGTH(c) \ 00308 ((uint32_t)(c)<=0x7f ? 1 : \ 00309 ((uint32_t)(c)<=0x7ff ? 2 : \ 00310 ((uint32_t)(c)<=0xffff ? 3 : \ 00311 ((uint32_t)(c)<=0x10ffff ? 4 : \ 00312 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 00313 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 00314 ) \ 00315 ) \ 00316 ) \ 00317 ) \ 00318 ) 00319 #endif 00320 00322 #define UTF8_MAX_CHAR_LENGTH 4 00323 00325 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 00326 00328 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 00329 int32_t __I=(int32_t)(i); \ 00330 UTF8_SET_CHAR_START_UNSAFE(s, __I); \ 00331 UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \ 00332 } 00333 00335 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00336 int32_t __I=(int32_t)(i); \ 00337 UTF8_SET_CHAR_START_SAFE(s, start, __I); \ 00338 UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \ 00339 } 00340 00342 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 00343 (c)=(s)[(i)++]; \ 00344 if((uint8_t)((c)-0xc0)<0x35) { \ 00345 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 00346 UTF8_MASK_LEAD_BYTE(c, __count); \ 00347 switch(__count) { \ 00348 /* each following branch falls through to the next one */ \ 00349 case 3: \ 00350 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00351 case 2: \ 00352 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00353 case 1: \ 00354 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00355 /* no other branches to optimize switch() */ \ 00356 break; \ 00357 } \ 00358 } \ 00359 } 00360 00362 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 00363 if((uint32_t)(c)<=0x7f) { \ 00364 (s)[(i)++]=(uint8_t)(c); \ 00365 } else { \ 00366 if((uint32_t)(c)<=0x7ff) { \ 00367 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 00368 } else { \ 00369 if((uint32_t)(c)<=0xffff) { \ 00370 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 00371 } else { \ 00372 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 00373 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 00374 } \ 00375 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 00376 } \ 00377 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 00378 } \ 00379 } 00380 00382 #define UTF8_FWD_1_UNSAFE(s, i) { \ 00383 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 00384 } 00385 00387 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 00388 int32_t __N=(n); \ 00389 while(__N>0) { \ 00390 UTF8_FWD_1_UNSAFE(s, i); \ 00391 --__N; \ 00392 } \ 00393 } 00394 00396 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 00397 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 00398 } 00399 00401 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00402 (c)=(s)[(i)++]; \ 00403 if((c)>=0x80) { \ 00404 if(UTF8_IS_LEAD(c)) { \ 00405 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 00406 } else { \ 00407 (c)=UTF8_ERROR_VALUE_1; \ 00408 } \ 00409 } \ 00410 } 00411 00413 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 00414 if((uint32_t)(c)<=0x7f) { \ 00415 (s)[(i)++]=(uint8_t)(c); \ 00416 } else { \ 00417 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 00418 } \ 00419 } 00420 00422 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 00423 00425 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 00426 00428 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 00429 00431 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 00432 (c)=(s)[--(i)]; \ 00433 if(UTF8_IS_TRAIL(c)) { \ 00434 uint8_t __b, __count=1, __shift=6; \ 00435 \ 00436 /* c is a trail byte */ \ 00437 (c)&=0x3f; \ 00438 for(;;) { \ 00439 __b=(s)[--(i)]; \ 00440 if(__b>=0xc0) { \ 00441 UTF8_MASK_LEAD_BYTE(__b, __count); \ 00442 (c)|=(UChar32)__b<<__shift; \ 00443 break; \ 00444 } else { \ 00445 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 00446 ++__count; \ 00447 __shift+=6; \ 00448 } \ 00449 } \ 00450 } \ 00451 } 00452 00454 #define UTF8_BACK_1_UNSAFE(s, i) { \ 00455 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 00456 } 00457 00459 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 00460 int32_t __N=(n); \ 00461 while(__N>0) { \ 00462 UTF8_BACK_1_UNSAFE(s, i); \ 00463 --__N; \ 00464 } \ 00465 } 00466 00468 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00469 UTF8_BACK_1_UNSAFE(s, i); \ 00470 UTF8_FWD_1_UNSAFE(s, i); \ 00471 } 00472 00474 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00475 (c)=(s)[--(i)]; \ 00476 if((c)>=0x80) { \ 00477 if((c)<=0xbf) { \ 00478 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 00479 } else { \ 00480 (c)=UTF8_ERROR_VALUE_1; \ 00481 } \ 00482 } \ 00483 } 00484 00486 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 00487 00489 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 00490 00492 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 00493 00494 /* Formerly utf16.h --------------------------------------------------------- */ 00495 00497 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 00498 00500 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 00501 00503 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 00504 00506 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 00507 00509 #define UTF16_GET_PAIR_VALUE(first, second) \ 00510 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 00511 00513 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 00514 00516 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 00517 00519 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 00520 00522 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 00523 00525 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 00526 00528 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 00529 00531 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 00532 00534 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 00535 00537 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 00538 00540 #define UTF16_MAX_CHAR_LENGTH 2 00541 00543 #define UTF16_ARRAY_SIZE(size) (size) 00544 00556 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 00557 (c)=(s)[i]; \ 00558 if(UTF_IS_SURROGATE(c)) { \ 00559 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00560 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 00561 } else { \ 00562 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 00563 } \ 00564 } \ 00565 } 00566 00568 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00569 (c)=(s)[i]; \ 00570 if(UTF_IS_SURROGATE(c)) { \ 00571 uint16_t __c2; \ 00572 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00573 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 00574 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00575 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00576 } else if(strict) {\ 00577 /* unmatched first surrogate */ \ 00578 (c)=UTF_ERROR_VALUE; \ 00579 } \ 00580 } else { \ 00581 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00582 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00583 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00584 } else if(strict) {\ 00585 /* unmatched second surrogate */ \ 00586 (c)=UTF_ERROR_VALUE; \ 00587 } \ 00588 } \ 00589 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00590 (c)=UTF_ERROR_VALUE; \ 00591 } \ 00592 } 00593 00595 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 00596 (c)=(s)[(i)++]; \ 00597 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00598 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 00599 } \ 00600 } 00601 00603 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 00604 if((uint32_t)(c)<=0xffff) { \ 00605 (s)[(i)++]=(uint16_t)(c); \ 00606 } else { \ 00607 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00608 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00609 } \ 00610 } 00611 00613 #define UTF16_FWD_1_UNSAFE(s, i) { \ 00614 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 00615 ++(i); \ 00616 } \ 00617 } 00618 00620 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 00621 int32_t __N=(n); \ 00622 while(__N>0) { \ 00623 UTF16_FWD_1_UNSAFE(s, i); \ 00624 --__N; \ 00625 } \ 00626 } 00627 00629 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 00630 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 00631 --(i); \ 00632 } \ 00633 } 00634 00636 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00637 (c)=(s)[(i)++]; \ 00638 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00639 uint16_t __c2; \ 00640 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 00641 ++(i); \ 00642 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00643 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00644 } else if(strict) {\ 00645 /* unmatched first surrogate */ \ 00646 (c)=UTF_ERROR_VALUE; \ 00647 } \ 00648 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00649 /* unmatched second surrogate or other non-character */ \ 00650 (c)=UTF_ERROR_VALUE; \ 00651 } \ 00652 } 00653 00655 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 00656 if((uint32_t)(c)<=0xffff) { \ 00657 (s)[(i)++]=(uint16_t)(c); \ 00658 } else if((uint32_t)(c)<=0x10ffff) { \ 00659 if((i)+1<(length)) { \ 00660 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00661 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00662 } else /* not enough space */ { \ 00663 (s)[(i)++]=UTF_ERROR_VALUE; \ 00664 } \ 00665 } else /* c>0x10ffff, write error value */ { \ 00666 (s)[(i)++]=UTF_ERROR_VALUE; \ 00667 } \ 00668 } 00669 00671 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 00672 00674 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 00675 00677 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 00678 00680 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 00681 (c)=(s)[--(i)]; \ 00682 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00683 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 00684 } \ 00685 } 00686 00688 #define UTF16_BACK_1_UNSAFE(s, i) { \ 00689 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 00690 --(i); \ 00691 } \ 00692 } 00693 00695 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 00696 int32_t __N=(n); \ 00697 while(__N>0) { \ 00698 UTF16_BACK_1_UNSAFE(s, i); \ 00699 --__N; \ 00700 } \ 00701 } 00702 00704 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00705 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 00706 ++(i); \ 00707 } \ 00708 } 00709 00711 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00712 (c)=(s)[--(i)]; \ 00713 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00714 uint16_t __c2; \ 00715 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00716 --(i); \ 00717 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00718 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00719 } else if(strict) {\ 00720 /* unmatched second surrogate */ \ 00721 (c)=UTF_ERROR_VALUE; \ 00722 } \ 00723 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00724 /* unmatched first surrogate or other non-character */ \ 00725 (c)=UTF_ERROR_VALUE; \ 00726 } \ 00727 } 00728 00730 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 00731 00733 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 00734 00736 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 00737 00738 /* Formerly utf32.h --------------------------------------------------------- */ 00739 00740 /* 00741 * Old documentation: 00742 * 00743 * This file defines macros to deal with UTF-32 code units and code points. 00744 * Signatures and semantics are the same as for the similarly named macros 00745 * in utf16.h. 00746 * utf32.h is included by utf.h after unicode/umachine.h</p> 00747 * and some common definitions. 00748 * <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. 00749 * Compound statements (curly braces {}) must be used for if-else-while... 00750 * bodies and all macro statements should be terminated with semicolon.</p> 00751 */ 00752 00753 /* internal definitions ----------------------------------------------------- */ 00754 00756 #define UTF32_IS_SAFE(c, strict) \ 00757 (!(strict) ? \ 00758 (uint32_t)(c)<=0x10ffff : \ 00759 UTF_IS_UNICODE_CHAR(c)) 00760 00761 /* 00762 * For the semantics of all of these macros, see utf16.h. 00763 * The UTF-32 versions are trivial because any code point is 00764 * encoded using exactly one code unit. 00765 */ 00766 00767 /* single-code point definitions -------------------------------------------- */ 00768 00769 /* classes of code unit values */ 00770 00772 #define UTF32_IS_SINGLE(uchar) 1 00773 00774 #define UTF32_IS_LEAD(uchar) 0 00775 00776 #define UTF32_IS_TRAIL(uchar) 0 00777 00778 /* number of code units per code point */ 00779 00781 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 00782 00783 #define UTF32_CHAR_LENGTH(c) 1 00784 00785 #define UTF32_MAX_CHAR_LENGTH 1 00786 00787 /* average number of code units compared to UTF-16 */ 00788 00790 #define UTF32_ARRAY_SIZE(size) (size) 00791 00793 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 00794 (c)=(s)[i]; \ 00795 } 00796 00798 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00799 (c)=(s)[i]; \ 00800 if(!UTF32_IS_SAFE(c, strict)) { \ 00801 (c)=UTF_ERROR_VALUE; \ 00802 } \ 00803 } 00804 00805 /* definitions with forward iteration --------------------------------------- */ 00806 00808 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 00809 (c)=(s)[(i)++]; \ 00810 } 00811 00813 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 00814 (s)[(i)++]=(c); \ 00815 } 00816 00818 #define UTF32_FWD_1_UNSAFE(s, i) { \ 00819 ++(i); \ 00820 } 00821 00823 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 00824 (i)+=(n); \ 00825 } 00826 00828 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 00829 } 00830 00832 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00833 (c)=(s)[(i)++]; \ 00834 if(!UTF32_IS_SAFE(c, strict)) { \ 00835 (c)=UTF_ERROR_VALUE; \ 00836 } \ 00837 } 00838 00840 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 00841 if((uint32_t)(c)<=0x10ffff) { \ 00842 (s)[(i)++]=(c); \ 00843 } else /* c>0x10ffff, write 0xfffd */ { \ 00844 (s)[(i)++]=0xfffd; \ 00845 } \ 00846 } 00847 00849 #define UTF32_FWD_1_SAFE(s, i, length) { \ 00850 ++(i); \ 00851 } 00852 00854 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 00855 if(((i)+=(n))>(length)) { \ 00856 (i)=(length); \ 00857 } \ 00858 } 00859 00861 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 00862 } 00863 00864 /* definitions with backward iteration -------------------------------------- */ 00865 00867 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 00868 (c)=(s)[--(i)]; \ 00869 } 00870 00872 #define UTF32_BACK_1_UNSAFE(s, i) { \ 00873 --(i); \ 00874 } 00875 00877 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 00878 (i)-=(n); \ 00879 } 00880 00882 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00883 } 00884 00886 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00887 (c)=(s)[--(i)]; \ 00888 if(!UTF32_IS_SAFE(c, strict)) { \ 00889 (c)=UTF_ERROR_VALUE; \ 00890 } \ 00891 } 00892 00894 #define UTF32_BACK_1_SAFE(s, start, i) { \ 00895 --(i); \ 00896 } 00897 00899 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 00900 (i)-=(n); \ 00901 if((i)<(start)) { \ 00902 (i)=(start); \ 00903 } \ 00904 } 00905 00907 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 00908 } 00909 00910 /* Formerly utf.h, part 2 --------------------------------------------------- */ 00911 00917 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 00918 00920 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 00921 00923 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 00924 00925 00927 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 00928 00930 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 00931 00932 00934 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 00935 00937 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 00938 00939 00941 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 00942 00944 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 00945 00946 00948 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 00949 00951 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 00952 00953 00955 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 00956 00958 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 00959 00960 00962 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 00963 00965 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 00966 00967 00969 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 00970 00972 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 00973 00974 00976 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 00977 00979 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 00980 00981 00983 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 00984 00986 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00987 00988 /* Define default macros (UTF-16 "safe") ------------------------------------ */ 00989 00995 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 00996 01002 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 01003 01009 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 01010 01016 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 01017 01023 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 01024 01030 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 01031 01041 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 01042 01054 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 01055 01067 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 01068 01078 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 01079 01089 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 01090 01105 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 01106 01118 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 01119 01131 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 01132 01144 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 01145 01160 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 01161 01162 #endif /* U_HIDE_DEPRECATED_API */ 01163 01164 #endif 01165

Generated on Fri Jun 18 12:36:03 2004 for ICU by doxygen 1.3.7