Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

ucnv.h File Reference

C API: Character conversion. More...

#include "unicode/utypes.h"
#include "unicode/ucnv_err.h"
#include "unicode/uenum.h"

Go to the source code of this file.

Defines

#define UCNV_MAX_CONVERTER_NAME_LENGTH   60
 Maximum length of a converter name including the terminating NULL. More...

#define UCNV_MAX_FULL_FILE_NAME_LENGTH   (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
 Maximum length of a converter name including path and terminating NULL. More...

#define UCNV_SI   0x0F
 Shift in for EBDCDIC_STATEFUL and iso2022 states. More...

#define UCNV_SO   0x0E
 Shift out for EBDCDIC_STATEFUL and iso2022 states. More...

#define UCNV_OPTION_SEP_CHAR   ','
 Character that separates converter names from options and options from each other. More...

#define UCNV_OPTION_SEP_STRING   ","
 String version of UCNV_OPTION_SEP_CHAR. More...

#define UCNV_VALUE_SEP_CHAR   '='
 Character that separates a converter option from its value. More...

#define UCNV_VALUE_SEP_STRING   "="
 String version of UCNV_VALUE_SEP_CHAR. More...

#define UCNV_LOCALE_OPTION_STRING   ",locale="
 Converter option for specifying a locale. More...

#define UCNV_VERSION_OPTION_STRING   ",version="
 Converter option for specifying a version selector (0..9) for some converters. More...

#define UCNV_SWAP_LFNL_OPTION_STRING   ",swaplfnl"
 Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. More...

#define U_CNV_SAFECLONE_BUFFERSIZE   3072

Typedefs

typedef UConverter UConverter

Enumerations

enum  UConverterType {
  UCNV_UNSUPPORTED_CONVERTER = -1, UCNV_SBCS = 0, UCNV_DBCS = 1, UCNV_MBCS = 2,
  UCNV_LATIN_1 = 3, UCNV_UTF8 = 4, UCNV_UTF16_BigEndian = 5, UCNV_UTF16_LittleEndian = 6,
  UCNV_UTF32_BigEndian = 7, UCNV_UTF32_LittleEndian = 8, UCNV_EBCDIC_STATEFUL = 9, UCNV_ISO_2022 = 10,
  UCNV_LMBCS_1 = 11, UCNV_LMBCS_2, UCNV_LMBCS_3, UCNV_LMBCS_4,
  UCNV_LMBCS_5, UCNV_LMBCS_6, UCNV_LMBCS_8, UCNV_LMBCS_11,
  UCNV_LMBCS_16, UCNV_LMBCS_17, UCNV_LMBCS_18, UCNV_LMBCS_19,
  UCNV_LMBCS_LAST = UCNV_LMBCS_19, UCNV_HZ, UCNV_SCSU, UCNV_ISCII,
  UCNV_US_ASCII, UCNV_UTF7, UCNV_BOCU1, UCNV_UTF16,
  UCNV_UTF32, UCNV_CESU8, UCNV_IMAP_MAILBOX, UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
}
 Enum for specifying basic types of converters. More...

enum  UConverterPlatform { UCNV_UNKNOWN = -1, UCNV_IBM = 0 }
 Enum for specifying which platform a converter ID refers to. More...


Functions

typedef void (1 *UConverterToUCallback)(const void *context, UConverterToUnicodeArgs *args, const char *codePoints, int32_t length, UConverterCallbackReason reason, UErrorCode *)
 Function pointer for error callback in the codepage to unicode direction. More...

int ucnv_compareNames (const char *name1, const char *name2)
 Do a fuzzy compare of a two converter/alias names. More...

UConverterucnv_open (const char *converterName, UErrorCode *err)
 Creates a UConverter object with the names specified as a C string. More...

UConverterucnv_openU (const UChar *name, UErrorCode *err)
 Creates a Unicode converter with the names specified as unicode string. More...

UConverterucnv_openCCSID (int32_t codepage, UConverterPlatform platform, UErrorCode *err)
 Creates a UConverter object from a CCSID number and platform pair. More...

UConverterucnv_openPackage (const char *packageName, const char *converterName, UErrorCode *err)
 <. More...

UConverterucnv_safeClone (const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
 Thread safe cloning operation. More...

void ucnv_close (UConverter *converter)
 Deletes the unicode converter and releases resources associated with just this instance. More...

void ucnv_getSubstChars (const UConverter *converter, char *subChars, int8_t *len, UErrorCode *err)
 Fills in the output parameter, subChars, with the substitution characters as multiple bytes. More...

void ucnv_setSubstChars (UConverter *converter, const char *subChars, int8_t len, UErrorCode *err)
 Sets the substitution chars when converting from unicode to a codepage. More...

void ucnv_getInvalidChars (const UConverter *converter, char *errBytes, int8_t *len, UErrorCode *err)
 Fills in the output parameter, errBytes, with the error characters from the last failing conversion. More...

void ucnv_getInvalidUChars (const UConverter *converter, UChar *errUChars, int8_t *len, UErrorCode *err)
 Fills in the output parameter, errChars, with the error characters from the last failing conversion. More...

void ucnv_reset (UConverter *converter)
 Resets the state of a converter to the default state. More...

void ucnv_resetToUnicode (UConverter *converter)
 Resets the to-Unicode part of a converter state to the default state. More...

void ucnv_resetFromUnicode (UConverter *converter)
 Resets the from-Unicode part of a converter state to the default state. More...

int8_t ucnv_getMaxCharSize (const UConverter *converter)
 Returns the maximum length of bytes used by a character. More...

int8_t ucnv_getMinCharSize (const UConverter *converter)
 Returns the minimum byte length for characters in this codepage. More...

int32_t ucnv_getDisplayName (const UConverter *converter, const char *displayLocale, UChar *displayName, int32_t displayNameCapacity, UErrorCode *err)
 Returns the display name of the converter passed in based on the Locale passed in. More...

const char * ucnv_getName (const UConverter *converter, UErrorCode *err)
 Gets the internal, canonical name of the converter (zero-terminated). More...

int32_t ucnv_getCCSID (const UConverter *converter, UErrorCode *err)
 Gets a codepage number associated with the converter. More...

UConverterPlatform ucnv_getPlatform (const UConverter *converter, UErrorCode *err)
 Gets a codepage platform associated with the converter. More...

UConverterType ucnv_getType (const UConverter *converter)
 Gets the type of the converter e.g. More...

void ucnv_getStarters (const UConverter *converter, UBool starters[256], UErrorCode *err)
 Gets the "starter" (lead) bytes for converters of type MBCS. More...

void ucnv_getToUCallBack (const UConverter *converter, UConverterToUCallback *action, const void **context)
 Gets the current calback function used by the converter when an illegal or invalid codepage sequence is found. More...

void ucnv_getFromUCallBack (const UConverter *converter, UConverterFromUCallback *action, const void **context)
 Gets the current callback function used by the converter when illegal or invalid Unicode sequence is found. More...

void ucnv_setToUCallBack (UConverter *converter, UConverterToUCallback newAction, const void *newContext, UConverterToUCallback *oldAction, const void **oldContext, UErrorCode *err)
 Changes the callback function used by the converter when an illegal or invalid sequence is found. More...

void ucnv_setFromUCallBack (UConverter *converter, UConverterFromUCallback newAction, const void *newContext, UConverterFromUCallback *oldAction, const void **oldContext, UErrorCode *err)
 Changes the current callback function used by the converter when an illegal or invalid sequence is found. More...

void ucnv_fromUnicode (UConverter *converter, char **target, const char *targetLimit, const UChar **source, const UChar *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err)
 Converts an array of unicode characters to an array of codepage characters. More...

void ucnv_toUnicode (UConverter *converter, UChar **target, const UChar *targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err)
 Converts a buffer of codepage bytes into an array of unicode UChars characters. More...

int32_t ucnv_fromUChars (UConverter *cnv, char *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
 Convert the Unicode string into a codepage string using an existing UConverter. More...

int32_t ucnv_toUChars (UConverter *cnv, UChar *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
 Convert the codepage string into a Unicode string using an existing UConverter. More...

UChar32 ucnv_getNextUChar (UConverter *converter, const char **source, const char *sourceLimit, UErrorCode *err)
 Will convert a codepage buffer into unicode one character at a time. More...

int32_t ucnv_convert (const char *toConverterName, const char *fromConverterName, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *err)
 Will convert a sequence of bytes from one codepage to another. More...

int32_t ucnv_flushCache (void)
 Frees up memory occupied by unused, cached converter shared data. More...

int32_t ucnv_countAvailable (void)
 returns the number of available converters, as per the alias file. More...

const char * ucnv_getAvailableName (int32_t n)
 Gets the name of the specified converter from a list of all converters contaied in the alias file. More...

UEnumerationucnv_openAllNames (UErrorCode *pErrorCode)
 Returns a UEnumeration to enumerate all of the canonical converter names, as per the alias file, regardless of the ability to open each converter. More...

uint16_t ucnv_countAliases (const char *alias, UErrorCode *pErrorCode)
 Gives the number of aliases for a given converter or alias name. More...

const char * ucnv_getAlias (const char *alias, uint16_t n, UErrorCode *pErrorCode)
 Gives the name of the alias at given index of alias list. More...

void ucnv_getAliases (const char *alias, const char **aliases, UErrorCode *pErrorCode)
 Fill-up the list of alias names for the given alias. More...

UEnumerationucnv_openStandardNames (const char *convName, const char *standard, UErrorCode *pErrorCode)
 Return a new UEnumeration object for enumerating all the alias names for a given converter that are recognized by a standard. More...

uint16_t ucnv_countStandards (void)
 Gives the number of standards associated to converter names. More...

const char * ucnv_getStandard (uint16_t n, UErrorCode *pErrorCode)
 Gives the name of the standard at given index of standard list. More...

const char * ucnv_getStandardName (const char *name, const char *standard, UErrorCode *pErrorCode)
 Returns a standard name for a given converter name. More...

const char * ucnv_getCanonicalName (const char *alias, const char *standard, UErrorCode *pErrorCode)
 This function will return the internal canonical converter name of the tagged alias. More...

const char * ucnv_getDefaultName (void)
 returns the current default converter name. More...

void ucnv_setDefaultName (const char *name)
 sets the current default converter name. More...

void ucnv_fixFileSeparator (const UConverter *cnv, UChar *source, int32_t sourceLen)
 Fixes the backslash character mismapping. More...

UBool ucnv_isAmbiguous (const UConverter *cnv)
 Determines if the converter contains ambiguous mappings of the same character or not. More...

void ucnv_setFallback (UConverter *cnv, UBool usesFallback)
 Sets the converter to use fallback mapping or not. More...

UBool ucnv_usesFallback (const UConverter *cnv)
 Determines if the converter uses fallback mappings or not. More...

const char * ucnv_detectUnicodeSignature (const char *source, int32_t sourceLength, int32_t *signatureLength, UErrorCode *pErrorCode)
 Detects Unicode signature byte sequences at the start of the byte stream and returns the charset name of the indicated Unicode charset. More...


Detailed Description

C API: Character conversion.

Character Conversion C API

This API is used to convert codepage or character encoded data to and from UTF-16. You can open a converter with ucnv_open. With that converter, you can get its properties, set options, convert your data and close the converter.

Since many software programs recogize different converter names for different types of converters, there are other functions in this API to iterate over the converter aliases. The functions ucnv_getAvailableName, ucnv_getAlias and ucnv_getStandardName are some of the more frequently used alias functions to get this information.

More information about this API can be found in our User's Guide.

Definition in file ucnv.h.


Define Documentation

#define UCNV_LOCALE_OPTION_STRING   ",locale="
 

Converter option for specifying a locale.

For example, ucnv_open("SCSU,locale=ja", &errorCode); See convrtrs.txt.

See also:
ucnv_open
Stable:
ICU 2.0

Definition at line 207 of file ucnv.h.

#define UCNV_MAX_CONVERTER_NAME_LENGTH   60
 

Maximum length of a converter name including the terminating NULL.

Stable:
ICU 2.0

Definition at line 57 of file ucnv.h.

#define UCNV_MAX_FULL_FILE_NAME_LENGTH   (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
 

Maximum length of a converter name including path and terminating NULL.

Stable:
ICU 2.0

Definition at line 59 of file ucnv.h.

#define UCNV_OPTION_SEP_CHAR   ','
 

Character that separates converter names from options and options from each other.

See also:
ucnv_open
Stable:
ICU 2.0

Definition at line 176 of file ucnv.h.

#define UCNV_OPTION_SEP_STRING   ","
 

String version of UCNV_OPTION_SEP_CHAR.

See also:
ucnv_open
Stable:
ICU 2.0

Definition at line 183 of file ucnv.h.

#define UCNV_SI   0x0F
 

Shift in for EBDCDIC_STATEFUL and iso2022 states.

Stable:
ICU 2.0

Definition at line 62 of file ucnv.h.

#define UCNV_SO   0x0E
 

Shift out for EBDCDIC_STATEFUL and iso2022 states.

Stable:
ICU 2.0

Definition at line 64 of file ucnv.h.

#define UCNV_SWAP_LFNL_OPTION_STRING   ",swaplfnl"
 

Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.

Swaps Unicode mappings for EBCDIC LF and NL codes, as used on S/390 (z/OS) Unix System Services (Open Edition). For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); See convrtrs.txt.

See also:
ucnv_open
Draft:
This API has been introduced in ICU 2.4. It is still in draft state and may be modified in a future release.

Definition at line 229 of file ucnv.h.

#define UCNV_VALUE_SEP_CHAR   '='
 

Character that separates a converter option from its value.

See also:
ucnv_open
Stable:
ICU 2.0

Definition at line 190 of file ucnv.h.

#define UCNV_VALUE_SEP_STRING   "="
 

String version of UCNV_VALUE_SEP_CHAR.

See also:
ucnv_open
Stable:
ICU 2.0

Definition at line 197 of file ucnv.h.

#define UCNV_VERSION_OPTION_STRING   ",version="
 

Converter option for specifying a version selector (0..9) for some converters.

For example, ucnv_open("UTF-7,version=1", &errorCode); See convrtrs.txt.

See also:
ucnv_open
Draft:
This API has been introduced in ICU 2.4. It is still in draft state and may be modified in a future release.

Definition at line 217 of file ucnv.h.

#define U_CNV_SAFECLONE_BUFFERSIZE   3072
 

Stable:
ICU 2.0

Definition at line 430 of file ucnv.h.


Typedef Documentation

typedef struct UConverter UConverter
 

Stable:
ICU 2.0

Definition at line 48 of file ucnv.h.


Enumeration Type Documentation

enum UConverterPlatform
 

Enum for specifying which platform a converter ID refers to.

The use of platform/CCSID is not recommended. See ucnv_openCCSID().

See also:
ucnv_getPlatform , ucnv_openCCSID , ucnv_getCCSID
Stable:
ICU 2.0
Enumeration values:
UCNV_UNKNOWN 
UCNV_IBM 

Definition at line 123 of file ucnv.h.

enum UConverterType
 

Enum for specifying basic types of converters.

See also:
ucnv_getType
Stable:
ICU 2.0
Enumeration values:
UCNV_UNSUPPORTED_CONVERTER 
UCNV_SBCS 
UCNV_DBCS 
UCNV_MBCS 
UCNV_LATIN_1 
UCNV_UTF8 
UCNV_UTF16_BigEndian 
UCNV_UTF16_LittleEndian 
UCNV_UTF32_BigEndian 
UCNV_UTF32_LittleEndian 
UCNV_EBCDIC_STATEFUL 
UCNV_ISO_2022 
UCNV_LMBCS_1 
UCNV_LMBCS_2 
UCNV_LMBCS_3 
UCNV_LMBCS_4 
UCNV_LMBCS_5 
UCNV_LMBCS_6 
UCNV_LMBCS_8 
UCNV_LMBCS_11 
UCNV_LMBCS_16 
UCNV_LMBCS_17 
UCNV_LMBCS_18 
UCNV_LMBCS_19 
UCNV_LMBCS_LAST 
UCNV_HZ 
UCNV_SCSU 
UCNV_ISCII 
UCNV_US_ASCII 
UCNV_UTF7 
UCNV_BOCU1 
UCNV_UTF16 
UCNV_UTF32 
UCNV_CESU8 
UCNV_IMAP_MAILBOX 
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES 

Definition at line 71 of file ucnv.h.


Function Documentation

void ucnv_close UConverter   converter
 

Deletes the unicode converter and releases resources associated with just this instance.

Does not free up shared converter tables.

Parameters:
converter  the converter object to be deleted
See also:
ucnv_open , ucnv_openU , ucnv_openCCSID
Stable:
ICU 2.0

int ucnv_compareNames const char *    name1,
const char *    name2
 

Do a fuzzy compare of a two converter/alias names.

The comparison is case-insensitive. It also ignores the characters '-', '_', and ' ' (dash, underscore, and space). Thus the strings "UTF-8", "utf_8", and "Utf 8" are exactly equivalent.

Parameters:
name1  a converter name or alias, zero-terminated
name2  a converter name or alias, zero-terminated
Returns:
0 if the names match, or a negative value if the name1 lexically precedes name2, or a positive value if the name1 lexically follows name2.
Stable:
ICU 2.0

int32_t ucnv_convert const char *    toConverterName,
const char *    fromConverterName,
char *    target,
int32_t    targetCapacity,
const char *    source,
int32_t    sourceLength,
UErrorCode   err
 

Will convert a sequence of bytes from one codepage to another.

This is NOT AN EFFICIENT way to transcode. use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency.

Parameters:
toConverterName  The name of the converter that will be used in conversion from unicode into the output buffer
fromConverterName:  The name of the converter that will be used in conversion from the source buffer into intermediate unicode.
target  Pointer to the output buffer
targetCapacity  capacity of the target, in bytes
source  Pointer to the input buffer
sourceLength  capacity of the source, in bytes
err  error status. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still input left in the source.
Returns:
will be filled in with the number of bytes needed in target
See also:
ucnv_fromUnicode , ucnv_toUnicode , ucnv_fromUChars , ucnv_toUChars , ucnv_getNextUChar
Stable:
ICU 2.0

uint16_t ucnv_countAliases const char *    alias,
UErrorCode   pErrorCode
 

Gives the number of aliases for a given converter or alias name.

If the alias is ambiguous, then the preferred converter is used and the status is set to U_AMBIGUOUS_ALIAS_WARNING. This method only enumerates the listed entries in the alias file.

Parameters:
alias  alias name
pErrorCode  error status
Returns:
number of names on alias list for given alias
Stable:
ICU 2.0

int32_t ucnv_countAvailable void   
 

returns the number of available converters, as per the alias file.

Returns:
the number of available converters
See also:
ucnv_getAvailableName
Stable:
ICU 2.0

uint16_t ucnv_countStandards void   
 

Gives the number of standards associated to converter names.

Returns:
number of standards
Stable:
ICU 2.0

const char* ucnv_detectUnicodeSignature const char *    source,
int32_t    sourceLength,
int32_t   signatureLength,
UErrorCode   pErrorCode
 

Detects Unicode signature byte sequences at the start of the byte stream and returns the charset name of the indicated Unicode charset.

NULL is returned when no Unicode signature is recognized. The number of bytes in the signature is output as well.

The caller can ucnv_open() a converter using the charset name. The first code unit (UChar) from the start of the stream will be U+FEFF (the Unicode BOM/signature character) and can usually be ignored.

For most Unicode charsets it is also possible to ignore the indicated number of initial stream bytes and start converting after them. However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which this will not work. Therefore, it is best to ignore the first output UChar instead of the input signature bytes.

Usage:

      UErrorCode err = U_ZERO_ERROR;
      char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
      int32_t signatureLength = 0;
      char *encoding = ucnv_detectUnicodeSignatures(input,sizeof(input),&signatureLength,&err);
      UConverter *conv = NULL;
      UChar output[100];
      UChar *target = output, *out;
      char *source = input;
      if(encoding!=NULL && U_SUCCESS(err)){
          // should signature be discarded ?
          conv = ucnv_open(encoding, &err);
          // do the conversion
          ucnv_toUnicode(conv,
                         target, output + sizeof(output)/U_SIZEOF_UCHAR,
                         source, input + sizeof(input),
                         NULL, TRUE, &err);
          out = output;
          if (discardSignature){
              ++out; // ignore initial U+FEFF
          }
          while(out != target) {
              printf("%04x ", *out++);
          }
          puts("");
      }
Parameters:
source  The source string in which the signature should be detected.
sourceLength  Length of the input string, or -1 if terminated with a NUL byte.
signatureLength  A pointer to int32_t to receive the number of bytes that make up the signature of the detected UTF. 0 if not detected. Can be a NULL pointer.
pErrorCode  A pointer to receive information about any errors that may occur during detection. Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
Returns:
The name of the encoding detected. NULL if encoding is not detected.
Stable:
ICU 2.4

void ucnv_fixFileSeparator const UConverter   cnv,
UChar *    source,
int32_t    sourceLen
 

Fixes the backslash character mismapping.

For example, in SJIS, the backslash character in the ASCII portion is also used to represent the yen currency sign. When mapping from Unicode character 0x005C, it's unclear whether to map the character back to yen or backslash in SJIS. This function will take the input buffer and replace all the yen sign characters with backslash. This is necessary when the user tries to open a file with the input buffer on Windows. This function will test the converter to see whether such mapping is required. You can sometimes avoid using this function by using the correct version of Shift-JIS.

Parameters:
cnv  The converter representing the target codepage.
source  the input buffer to be fixed
sourceLen  the length of the input buffer
See also:
ucnv_isAmbiguous
Stable:
ICU 2.0

int32_t ucnv_flushCache void   
 

Frees up memory occupied by unused, cached converter shared data.

Returns:
the number of cached converters successfully deleted
See also:
ucnv_close
Stable:
ICU 2.0

int32_t ucnv_fromUChars UConverter   cnv,
char *    dest,
int32_t    destCapacity,
const UChar *    src,
int32_t    srcLength,
UErrorCode   pErrorCode
 

Convert the Unicode string into a codepage string using an existing UConverter.

The output string is NUL-terminated if possible.

This function is a more convenient but less powerful version of ucnv_fromUnicode(). It is only useful for whole strings, not for streaming conversion.

The maximum output buffer capacity required (barring output from callbacks) will be srcLength*ucnv_getMaxCharSize(cnv).

Parameters:
cnv  the converter object to be used (ucnv_resetFromUnicode() will be called)
src  the input Unicode string
srcLength  the input string length, or -1 if NUL-terminated
dest  destination string buffer, can be NULL if destCapacity==0
destCapacity  the number of chars available at dest
errorCode  normal ICU error code; common error codes that may be set by this function include U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
Returns:
the length of the output string, not counting the terminating NUL; if the length is greater than destCapacity, then the string will not fit and a buffer of the indicated length would need to be passed in
See also:
ucnv_fromUnicode , ucnv_convert
Stable:
ICU 2.0

void ucnv_fromUnicode UConverter   converter,
char **    target,
const char *    targetLimit,
const UChar **    source,
const UChar *    sourceLimit,
int32_t   offsets,
UBool    flush,
UErrorCode   err
 

Converts an array of unicode characters to an array of codepage characters.

This function is optimized for converting a continuous stream of data in buffer-sized chunks, where the entire source and target does not fit in available buffers.

The source pointer is an in/out parameter. It starts out pointing where the conversion is to begin, and ends up pointing after the last UChar consumed.

Target similarly starts out pointer at the first available byte in the output buffer, and ends up pointing after the last byte written to the output.

The converter always attempts to consume the entire source buffer, unless (1.) the target buffer is full, or (2.) a failing error is returned from the current callback function. When a successful error status has been returned, it means that all of the source buffer has been consumed. At that point, the caller should reset the source and sourceLimit pointers to point to the next chunk.

This is a stateful conversion. Additionally, even when all source data has been consumed, some data may be in the converters' internal state. Call this function repeatedly, updating the target pointers with the next empty chunk of target in case of a U_BUFFER_OVERFLOW_ERROR, and updating the source pointers with the next chunk of source when a successful error status is returned, until there are no more chunks of source data.

Parameters:
converter  the Unicode converter
target  I/O parameter. Input : Points to the beginning of the buffer to copy codepage characters to. Output : points to after the last codepage character copied to target.
targetLimit  the pointer just after last of the target buffer
source  I/O parameter, pointer to pointer to the source Unicode character buffer.
sourceLimit  the pointer just after the last of the source buffer
offsets  if NULL is passed, nothing will happen to it, otherwise it needs to have the same number of allocated cells as target. Will fill in offsets from target to source pointer e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6] For output data carried across calls, and other data without a specific source character (such as from escape sequences or callbacks) -1 will be placed for offsets.
flush  set to TRUE if the current source buffer is the last available chunk of the source, FALSE otherwise. Note that if a failing status is returned, this function may have to be called multiple times with flush set to TRUE until the source buffer is consumed.
err  the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the converter is NULL. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still data to be written to the target.
See also:
ucnv_fromUChars , ucnv_convert , ucnv_getMinCharSize , ucnv_setToUCallBack
Stable:
ICU 2.0

const char* ucnv_getAlias const char *    alias,
uint16_t    n,
UErrorCode   pErrorCode
 

Gives the name of the alias at given index of alias list.

This method only enumerates the listed entries in the alias file. If the alias is ambiguous, then the preferred converter is used and the status is set to U_AMBIGUOUS_ALIAS_WARNING.

Parameters:
alias  alias name
n  index in alias list
pErrorCode  result of operation
Returns:
returns the name of the alias at given index
See also:
ucnv_countAliases
Stable:
ICU 2.0

void ucnv_getAliases const char *    alias,
const char **    aliases,
UErrorCode   pErrorCode
 

Fill-up the list of alias names for the given alias.

This method only enumerates the listed entries in the alias file. If the alias is ambiguous, then the preferred converter is used and the status is set to U_AMBIGUOUS_ALIAS_WARNING.

Parameters:
alias  alias name
aliases  fill-in list, aliases is a pointer to an array of ucnv_countAliases() string-pointers (const char *) that will be filled in. The strings themselves are owned by the library.
pErrorCode  result of operation
Stable:
ICU 2.0

const char* ucnv_getAvailableName int32_t    n
 

Gets the name of the specified converter from a list of all converters contaied in the alias file.

Parameters:
n  the index to a converter available on the system (in the range [0..ucnv_countAvaiable()])
Returns:
a pointer a string (library owned), or NULL if the index is out of bounds.
See also:
ucnv_countAvailable
Stable:
ICU 2.0

int32_t ucnv_getCCSID const UConverter   converter,
UErrorCode   err
 

Gets a codepage number associated with the converter.

This is not guaranteed to be the one used to create the converter. Some converters do not represent platform registered codepages and return zero for the codepage number. The error code fill-in parameter indicates if the codepage number is available. Does not check if the converter is NULL or if converter's data table is NULL.

Important: The use of CCSIDs is not recommended because it is limited to only two platforms in principle and only one (UCNV_IBM) in the current ICU converter API. Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. For more details see ucnv_openCCSID().

Parameters:
converter  the Unicode converter
err  the error status code.
Returns:
If any error occurrs, -1 will be returned otherwise, the codepage number will be returned
See also:
ucnv_openCCSID , ucnv_getPlatform
Stable:
ICU 2.0

const char* ucnv_getCanonicalName const char *    alias,
const char *    standard,
UErrorCode   pErrorCode
 

This function will return the internal canonical converter name of the tagged alias.

This is the opposite of ucnv_openStandardNames, which returns the tagged alias given the canonical name.

Returns:
returns the canonical converter name; if a standard or alias name cannot be determined, then NULL is returned. The returned string is owned by the library.
See also:
ucnv_getStandardName
Draft:
This API has been introduced in ICU 2.4. It is still in draft state and may be modified in a future release.

const char* ucnv_getDefaultName void   
 

returns the current default converter name.

Returns:
returns the current default converter name; if a default converter name cannot be determined, then NULL is returned. Storage owned by the library
See also:
ucnv_setDefaultName
Stable:
ICU 2.0

int32_t ucnv_getDisplayName const UConverter   converter,
const char *    displayLocale,
UChar *    displayName,
int32_t    displayNameCapacity,
UErrorCode   err
 

Returns the display name of the converter passed in based on the Locale passed in.

If the locale contains no display name, the internal ASCII name will be filled in.

Parameters:
converter  the Unicode converter.
displayLocale  is the specific Locale we want to localised for
displayName  user provided buffer to be filled in
displayNameCapacty  size of displayName Buffer
err  error status code
Returns:
displayNameLength number of UChar needed in displayName
See also:
ucnv_getName
Stable:
ICU 2.0

void ucnv_getFromUCallBack const UConverter   converter,
UConverterFromUCallback *    action,
const void **    context
 

Gets the current callback function used by the converter when illegal or invalid Unicode sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter  the unicode converter
action  fillin: returns the callback function pointer
context  fillin: returns the callback's private void* context
See also:
ucnv_setFromUCallBack
Stable:
ICU 2.0

void ucnv_getInvalidChars const UConverter   converter,
char *    errBytes,
int8_t   len,
UErrorCode   err
 

Fills in the output parameter, errBytes, with the error characters from the last failing conversion.

Parameters:
converter  the Unicode converter
errBytes  the codepage bytes which were in error
len  on input the capacity of errBytes, on output the number of bytes which were copied to it
err  the error status code. If the substitution character array is too small, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
Stable:
ICU 2.0

void ucnv_getInvalidUChars const UConverter   converter,
UChar *    errUChars,
int8_t   len,
UErrorCode   err
 

Fills in the output parameter, errChars, with the error characters from the last failing conversion.

Parameters:
converter  the Unicode converter
errUChars  the UChars which were in error
len  on input the capacity of errUChars, on output the number of UChars which were copied to it
err  the error status code. If the substitution character array is too small, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
Stable:
ICU 2.0

int8_t ucnv_getMaxCharSize const UConverter   converter
 

Returns the maximum length of bytes used by a character.

This varies between 1 and 4

Parameters:
converter  the Unicode converter
Returns:
the maximum number of bytes allowed by this particular converter
See also:
ucnv_getMinCharSize
Stable:
ICU 2.0

int8_t ucnv_getMinCharSize const UConverter   converter
 

Returns the minimum byte length for characters in this codepage.

This is either 1 or 2 for all supported codepages.

Parameters:
converter  the Unicode converter
Returns:
the minimum number of bytes allowed by this particular converter
See also:
ucnv_getMaxCharSize
Stable:
ICU 2.0

const char* ucnv_getName const UConverter   converter,
UErrorCode   err
 

Gets the internal, canonical name of the converter (zero-terminated).

The lifetime of the returned string will be that of the converter passed to this function.

Parameters:
converter  the Unicode converter
err  UErrorCode status
Returns:
the internal name of the converter
See also:
ucnv_getDisplayName
Stable:
ICU 2.0

UChar32 ucnv_getNextUChar UConverter   converter,
const char **    source,
const char *    sourceLimit,
UErrorCode   err
 

Will convert a codepage buffer into unicode one character at a time.

This function was written to be efficient when transcoding small amounts of data at a time. In that case it will be more efficient than \Ref{ucnv_toUnicode}. When converting large buffers use \Ref{ucnv_toUnicode}.

Handling of surrogate pairs and supplementary-plane code points:
There are two different kinds of codepages that provide mappings for surrogate characters:

  • Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff. Each valid sequence will result in exactly one returned code point. If a sequence results in a single surrogate, then that will be returned by itself, even if a neighboring sequence encodes the matching surrogate.
  • Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points including surrogates. Code points in supplementary planes are represented with two sequences, each encoding a surrogate. For these codepages, matching pairs of surrogates will be combined into single code points for returning from this function. (Note that SCSU is actually a mix of these codepage types.)
Parameters:
converter  an open UConverter
source  the address of a pointer to the codepage buffer, will be updated to point after the bytes consumed in the conversion call.
sourceLimit  points to the end of the input buffer
err  fills in error status (see ucnv_toUnicode) U_INDEX_OUTOFBOUNDS_ERROR will be set if the input is empty or does not convert to any output (e.g.: pure state-change codes SI/SO, escape sequences for ISO 2022, or if the callback did not output anything, ...). This function will not set a U_BUFFER_OVERFLOW_ERROR because the "buffer" is the return code. However, there might be subsequent output stored in the converter object that will be returned in following calls to this function.
Returns:
a UChar32 resulting from the partial conversion of source
See also:
ucnv_toUnicode , ucnv_toUChars , ucnv_convert
Stable:
ICU 2.0

UConverterPlatform ucnv_getPlatform const UConverter   converter,
UErrorCode   err
 

Gets a codepage platform associated with the converter.

Currently, only UCNV_IBM will be returned. Does not test if the converter is NULL or if converter's data table is NULL.

Parameters:
converter  the Unicode converter
err  the error status code.
Returns:
The codepage platform
Stable:
ICU 2.0

const char* ucnv_getStandard uint16_t    n,
UErrorCode   pErrorCode
 

Gives the name of the standard at given index of standard list.

Parameters:
n  index in standard list
pErrorCode  result of operation
Returns:
returns the name of the standard at given index. Owned by the library.
Stable:
ICU 2.0

const char* ucnv_getStandardName const char *    name,
const char *    standard,
UErrorCode   pErrorCode
 

Returns a standard name for a given converter name.

Parameters:
name  original converter name
standard  name of the standard governing the names; MIME and IANA are such standards
Returns:
returns the standard converter name; if a standard converter name cannot be determined, then NULL is returned. Owned by the library.
Stable:
ICU 2.0

void ucnv_getStarters const UConverter   converter,
UBool    starters[256],
UErrorCode   err
 

Gets the "starter" (lead) bytes for converters of type MBCS.

Will fill in an U_ILLEGAL_ARGUMENT_ERROR if converter passed in is not MBCS. Fills in an array of type UBool, with the value of the byte as offset to the array. For example, if (starters[0x20] == TRUE) at return, it means that the byte 0x20 is a starter byte in this converter. Context pointers are always owned by the caller.

Parameters:
converter  a valid, opened converter of type MBCS
starters  an array of size 256 to be filled in
err  error status, U_ILLEGAL_ARGUMENT_ERROR if the converter is not a type which can return starters.
See also:
ucnv_getType
Stable:
ICU 2.0

void ucnv_getSubstChars const UConverter   converter,
char *    subChars,
int8_t   len,
UErrorCode   err
 

Fills in the output parameter, subChars, with the substitution characters as multiple bytes.

Parameters:
converter  the Unicode converter
subChars  the subsitution characters
len  on input the capacity of subChars, on output the number of bytes copied to it
err  the outgoing error status code. If the substitution character array is too small, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
See also:
ucnv_setSubstChars
Stable:
ICU 2.0

void ucnv_getToUCallBack const UConverter   converter,
UConverterToUCallback *    action,
const void **    context
 

Gets the current calback function used by the converter when an illegal or invalid codepage sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter  the unicode converter
action  fillin: returns the callback function pointer
context  fillin: returns the callback's private void* context
See also:
ucnv_setToUCallBack
Stable:
ICU 2.0

UConverterType ucnv_getType const UConverter   converter
 

Gets the type of the converter e.g.

SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1

Parameters:
converter  a valid, opened converter
Returns:
the type of the converter
Stable:
ICU 2.0

UBool ucnv_isAmbiguous const UConverter   cnv
 

Determines if the converter contains ambiguous mappings of the same character or not.

Parameters:
cnv  the converter to be tested
Returns:
TRUE if the converter contains ambiguous mapping of the same character, FALSE otherwise.
Stable:
ICU 2.0

UConverter* ucnv_open const char *    converterName,
UErrorCode   err
 

Creates a UConverter object with the names specified as a C string.

The actual name will be resolved with the alias file using a case-insensitive string comparison that ignores the delimiters '-', '_', and ' ' (dash, underscore, and space). E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. If NULL is passed for the converter name, it will create one with the getDefaultName return value.

A converter name for ICU 1.5 and above may contain options like a locale specification to control the specific behavior of the newly instantiated converter. The meaning of the options depends on the particular converter. If an option is not defined for or recognized by a given converter, then it is ignored.

Options are appended to the converter name string, with a UCNV_OPTION_SEP_CHAR between the name and the first option and also between adjacent options.

If the alias is ambiguous, then the preferred converter is used and the status is set to U_AMBIGUOUS_ALIAS_WARNING.

Parameters:
converterName  : name of the uconv table, may have options appended
err  outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR
Returns:
the created Unicode converter object, or NULL if an error occured
See also:
ucnv_openU , ucnv_openCCSID , ucnv_close
Stable:
ICU 2.0

UEnumeration* ucnv_openAllNames UErrorCode   pErrorCode
 

Returns a UEnumeration to enumerate all of the canonical converter names, as per the alias file, regardless of the ability to open each converter.

Returns:
A UEnumeration object for getting all the recognized canonical converter names.
See also:
ucnv_getAvailableName , uenum_close , uenum_next
Draft:
This API has been introduced in ICU 2.4. It is still in draft state and may be modified in a future release.

UConverter* ucnv_openCCSID int32_t    codepage,
UConverterPlatform    platform,
UErrorCode   err
 

Creates a UConverter object from a CCSID number and platform pair.

Note that the usefulness of this function is limited to platforms with numeric encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for encodings.

In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and for some Unicode conversion tables there are multiple CCSIDs. Some "alternate" Unicode conversion tables are provided by the IBM CDRA conversion table registry. The most prominent example of a systematic modification of conversion tables that is not provided in the form of conversion table files in the repository is that S/390 Unix System Services swaps the codes for Line Feed and New Line in all EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.

Only IBM default conversion tables are accessible with ucnv_openCCSID(). ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated with that CCSID.

Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.

In summary, the use of CCSIDs and the associated API functions is not recommended.

In order to open a converter with the default IBM CDRA Unicode conversion table, you can use this function or use the prefix "ibm-":

     char name[20];
     sprintf(name, "ibm-%hu", ccsid);
     cnv=ucnv_open(name, &errorCode);

In order to open a converter with the IBM S/390 Unix System Services variant of a Unicode/EBCDIC conversion table, you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:

     char name[20];
     sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
     cnv=ucnv_open(name, &errorCode);

In order to open a converter from a Microsoft codepage number, use the prefix "cp":

     char name[20];
     sprintf(name, "cp%hu", codepageID);
     cnv=ucnv_open(name, &errorCode);

If the alias is ambiguous, then the preferred converter is used and the status is set to U_AMBIGUOUS_ALIAS_WARNING.

Parameters:
codepage  codepage number to create
platform  the platform in which the codepage number exists
err  error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR
Returns:
the created Unicode converter object, or NULL if an error occured.
See also:
ucnv_open , ucnv_openU , ucnv_close , ucnv_getCCSID , ucnv_getPlatform , UConverterPlatform
Stable:
ICU 2.0

UConverter* ucnv_openPackage const char *    packageName,
const char *    converterName,
UErrorCode   err
 

<.

p>Creates a UConverter object specified from a packageName and a converterName.

The packageName and converterName must point to an ICU udata object, as defined by udata_open( packageName, "cnv", converterName, err) or equivalent. Typically, packageName will refer to a (.dat) file, or to a package registered with udata_setAppData().

The name will NOT be looked up in the alias mechanism, nor will the converter be stored in the converter cache or the alias table. The only way to open further converters is call this function multiple times, or use the ucnv_safeClone() function to clone a 'master' converter.

Example Use: cnv = ucnv_openPackage("myapp", "myconverter", &err);</conv>

Parameters:
packageName  name of the package (equivalent to 'path' in udata_open() call)
converterName  name of the data item to be used, without suffix.
err  outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR
Returns:
the created Unicode converter object, or NULL if an error occured
See also:
udata_open , ucnv_open , ucnv_safeClone , ucnv_close
Draft:
This API has been introduced in ICU 2.2. It is still in draft state and may be modified in a future release.

UEnumeration* ucnv_openStandardNames const char *    convName,
const char *    standard,
UErrorCode   pErrorCode
 

Return a new UEnumeration object for enumerating all the alias names for a given converter that are recognized by a standard.

This method only enumerates the listed entries in the alias file. The convrtrs.txt file can be modified to change the results of this function. The first result in this list is the same result given by ucnv_getStandardName, which is the default alias for the specified standard name. The returned object must be closed with uenum_close when you are done with the object.

Parameters:
convName  original converter name
standard  name of the standard governing the names; MIME and IANA are such standards
pErrorCode  The error code
Returns:
A UEnumeration object for getting all aliases that are recognized by a standard. If any of the parameters are invalid, NULL is returned.
See also:
ucnv_getStandardName , uenum_close , uenum_next
Draft:
This API has been introduced in ICU 2.2. It is still in draft state and may be modified in a future release.

UConverter* ucnv_openU const UChar *    name,
UErrorCode   err
 

Creates a Unicode converter with the names specified as unicode string.

The name should be limited to the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file using a case-insensitive string comparison that ignores the delimiters '-', '_', and ' ' (dash, underscore, and space). E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. If NULL is passed for the converter name, it will create one with the ucnv_getDefaultName() return value. If the alias is ambiguous, then the preferred converter is used and the status is set to U_AMBIGUOUS_ALIAS_WARNING.

Parameters:
name  : name of the uconv table in a zero terminated Unicode string
err  outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR
Returns:
the created Unicode converter object, or NULL if an error occured
See also:
ucnv_open , ucnv_openCCSID , ucnv_close , ucnv_getDefaultName
Stable:
ICU 2.0

void ucnv_reset UConverter   converter
 

Resets the state of a converter to the default state.

This is used in the case of an error, to restart a conversion from a known default state. It will also empty the internal output buffers.

Parameters:
converter  the Unicode converter
Stable:
ICU 2.0

void ucnv_resetFromUnicode UConverter   converter
 

Resets the from-Unicode part of a converter state to the default state.

This is used in the case of an error to restart a conversion from Unicode to a known default state. It will also empty the internal output buffers used for the conversion from Unicode codepoints.

Parameters:
converter  the Unicode converter
Stable:
ICU 2.0

void ucnv_resetToUnicode UConverter   converter
 

Resets the to-Unicode part of a converter state to the default state.

This is used in the case of an error to restart a conversion to Unicode to a known default state. It will also empty the internal output buffers used for the conversion to Unicode codepoints.

Parameters:
converter  the Unicode converter
Stable:
ICU 2.0

UConverter* ucnv_safeClone const UConverter   cnv,
void *    stackBuffer,
int32_t   pBufferSize,
UErrorCode   status
 

Thread safe cloning operation.

Parameters:
cnv  converter to be cloned
stackBuffer  user allocated space for the new clone. If NULL new memory will be allocated. If buffer is not large enough, new memory will be allocated. Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
pBufferSize  pointer to size of allocated space. If *pBufferSize == 0, a sufficient size for use in cloning will be returned ('pre-flighting') If *pBufferSize is not enough for a stack-based safe clone, new memory will be allocated.
status  to indicate whether the operation went on smoothly or there were errors An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
Returns:
pointer to the new clone
Stable:
ICU 2.0

void ucnv_setDefaultName const char *    name
 

sets the current default converter name.

Caller must own the storage for 'name' and preserve it indefinitely.

Parameters:
name  the converter name to be the default (must exist).
See also:
ucnv_getDefaultName
System:
SYSTEM API
Stable:
ICU 2.0

void ucnv_setFallback UConverter   cnv,
UBool    usesFallback
 

Sets the converter to use fallback mapping or not.

Parameters:
cnv  The converter to set the fallback mapping usage on.
usesFallback  TRUE if the user wants the converter to take advantage of the fallback mapping, FALSE otherwise.
Stable:
ICU 2.0

void ucnv_setFromUCallBack UConverter   converter,
UConverterFromUCallback    newAction,
const void *    newContext,
UConverterFromUCallback *    oldAction,
const void **    oldContext,
UErrorCode   err
 

Changes the current callback function used by the converter when an illegal or invalid sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter  the unicode converter
newAction  the new callback function
newContext  the new fromUnicode callback context pointer
oldAction  fillin: returns the old callback function pointer
oldContext  fillin: returns the old callback's private void* context
err  The error code status
See also:
ucnv_getFromUCallBack
Stable:
ICU 2.0

void ucnv_setSubstChars UConverter   converter,
const char *    subChars,
int8_t    len,
UErrorCode   err
 

Sets the substitution chars when converting from unicode to a codepage.

The substitution is specified as a string of 1-4 bytes, and may contain NULL byte.

Parameters:
converter  the Unicode converter
subChars  the substitution character byte sequence we want set
len  the number of bytes in subChars
err  the error status code. U_INDEX_OUTOFBOUNDS_ERROR if len is bigger than the maximum number of bytes allowed in subchars
See also:
ucnv_getSubstChars
Stable:
ICU 2.0

void ucnv_setToUCallBack UConverter   converter,
UConverterToUCallback    newAction,
const void *    newContext,
UConverterToUCallback *    oldAction,
const void **    oldContext,
UErrorCode   err
 

Changes the callback function used by the converter when an illegal or invalid sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter  the unicode converter
newAction  the new callback function
newContext  the new toUnicode callback context pointer
oldAction  fillin: returns the old callback function pointer
oldContext  fillin: returns the old callback's private void* context
err  The error code status
See also:
ucnv_getToUCallBack
Stable:
ICU 2.0

int32_t ucnv_toUChars UConverter   cnv,
UChar *    dest,
int32_t    destCapacity,
const char *    src,
int32_t    srcLength,
UErrorCode   pErrorCode
 

Convert the codepage string into a Unicode string using an existing UConverter.

The output string is NUL-terminated if possible.

This function is a more convenient but less powerful version of ucnv_toUnicode(). It is only useful for whole strings, not for streaming conversion.

The maximum output buffer capacity required (barring output from callbacks) will be 2*srcLength (each char may be converted into a surrogate pair).

Parameters:
cnv  the converter object to be used (ucnv_resetToUnicode() will be called)
src  the input codepage string
srcLength  the input string length, or -1 if NUL-terminated
dest  destination string buffer, can be NULL if destCapacity==0
destCapacity  the number of UChars available at dest
errorCode  normal ICU error code; common error codes that may be set by this function include U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
Returns:
the length of the output string, not counting the terminating NUL; if the length is greater than destCapacity, then the string will not fit and a buffer of the indicated length would need to be passed in
See also:
ucnv_toUnicode , ucnv_convert
Stable:
ICU 2.0

void ucnv_toUnicode UConverter   converter,
UChar **    target,
const UChar *    targetLimit,
const char **    source,
const char *    sourceLimit,
int32_t   offsets,
UBool    flush,
UErrorCode   err
 

Converts a buffer of codepage bytes into an array of unicode UChars characters.

This function is optimized for converting a continuous stream of data in buffer-sized chunks, where the entire source and target does not fit in available buffers.

The source pointer is an in/out parameter. It starts out pointing where the conversion is to begin, and ends up pointing after the last byte of source consumed.

Target similarly starts out pointer at the first available UChar in the output buffer, and ends up pointing after the last UChar written to the output. It does NOT necessarily keep UChar sequences together.

The converter always attempts to consume the entire source buffer, unless (1.) the target buffer is full, or (2.) a failing error is returned from the current callback function. When a successful error status has been returned, it means that all of the source buffer has been consumed. At that point, the caller should reset the source and sourceLimit pointers to point to the next chunk.

This is a stateful conversion. Additionally, even when all source data has been consumed, some data may be in the converters' internal state. Call this function repeatedly, updating the target pointers with the next empty chunk of target in case of a U_BUFFER_OVERFLOW_ERROR, and updating the source pointers with the next chunk of source when a successful error status is returned, until there are no more chunks of source data.

Parameters:
converter  the Unicode converter
target  I/O parameter. Input : Points to the beginning of the buffer to copy UChars into. Output : points to after the last UChar copied.
targetLimit  the pointer just after the end of the target buffer
source  I/O parameter, pointer to pointer to the source codepage buffer.
sourceLimit  the pointer to the byte after the end of the source buffer
offsets  if NULL is passed, nothing will happen to it, otherwise it needs to have the same number of allocated cells as target. Will fill in offsets from target to source pointer e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6] For output data carried across calls, and other data without a specific source character (such as from escape sequences or callbacks) -1 will be placed for offsets.
flush  set to TRUE if the current source buffer is the last available chunk of the source, FALSE otherwise. Note that if a failing status is returned, this function may have to be called multiple times with flush set to TRUE until the source buffer is consumed.
err  the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the converter is NULL. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still data to be written to the target.
See also:
ucnv_fromUChars , ucnv_convert , ucnv_getMinCharSize , ucnv_setFromUCallBack , ucnv_getNextUChar
Stable:
ICU 2.0

UBool ucnv_usesFallback const UConverter   cnv
 

Determines if the converter uses fallback mappings or not.

Parameters:
cnv  The converter to be tested
Returns:
TRUE if the converter uses fallback, FALSE otherwise.
Stable:
ICU 2.0

typedef void 1 *    UConverterFromUCallback const
 

Function pointer for error callback in the codepage to unicode direction.

Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).

Parameters:
context  Pointer to the callback's private data
args  Information about the conversion in progress
codePoints  Points to 'length' bytes of the concerned codepage sequence
length  Size (in bytes) of the concerned codepage sequence
reason  Defines the reason the callback was invoked
See also:
ucnv_setToUCallBack , UConverterToUnicodeArgs
Stable:
ICU 2.0


Generated on Wed Dec 18 16:50:26 2002 for ICU 2.4 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001