#include "unicode/utypes.h"
#include "unicode/uchar.h"
Go to the source code of this file.
Defines | |
#define | UBIDI_DEFAULT_LTR 0xfe |
Paragraph level setting. More... | |
#define | UBIDI_DEFAULT_RTL 0xff |
Paragraph level setting. More... | |
#define | UBIDI_MAX_EXPLICIT_LEVEL 61 |
Maximum explicit embedding level. More... | |
#define | UBIDI_LEVEL_OVERRIDE 0x80 |
Bit flag for level input. More... | |
#define | UBIDI_KEEP_BASE_COMBINING 1 |
option bit for ubidi_writeReordered(): keep combining characters after their base characters in RTL runs. More... | |
#define | UBIDI_DO_MIRRORING 2 |
option bit for ubidi_writeReordered(): replace characters with the "mirrored" property in RTL runs by their mirror-image mappings. More... | |
#define | UBIDI_INSERT_LRM_FOR_NUMERIC 4 |
option bit for ubidi_writeReordered(): surround the run with LRMs if necessary; this is part of the approximate "inverse BiDi" algorithm. More... | |
#define | UBIDI_REMOVE_BIDI_CONTROLS 8 |
option bit for ubidi_writeReordered(): remove BiDi control characters (this does not affect UBIDI_INSERT_LRM_FOR_NUMERIC). More... | |
#define | UBIDI_OUTPUT_REVERSE 16 |
option bit for ubidi_writeReordered(): write the output in reverse order. More... | |
Typedefs | |
typedef uint8_t | UBiDiLevel |
UBiDiLevel is the type of the level values in this BiDi implementation. More... | |
typedef enum UBiDiDirection | UBiDiDirection |
typedef struct UBiDi | UBiDi |
Enumerations | |
enum | UBiDiDirection { UBIDI_LTR, UBIDI_RTL, UBIDI_MIXED } |
UBiDiDirection values indicate the text direction. More... | |
Functions | |
U_CAPI UBiDi* U_EXPORT2 | ubidi_open (void) |
Allocate a UBiDi structure. More... | |
U_CAPI UBiDi* U_EXPORT2 | ubidi_openSized (UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErrorCode) |
Allocate a UBiDi structure with preallocated memory for internal structures. More... | |
U_CAPI void U_EXPORT2 | ubidi_close (UBiDi *pBiDi) |
ubidi_close() must be called to free the memory associated with a UBiDi object.
. More... | |
U_CAPI void U_EXPORT2 | ubidi_setInverse (UBiDi *pBiDi, UBool isInverse) |
Modify the operation of the BiDi algorithm such that it approximates an "inverse BiDi" algorithm. More... | |
U_CAPI UBool U_EXPORT2 | ubidi_isInverse (UBiDi *pBiDi) |
Is this BiDi object set to perform the inverse BiDi algorithm? More... | |
U_CAPI void U_EXPORT2 | ubidi_setPara (UBiDi *pBiDi, const UChar *text, UTextOffset length, UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, UErrorCode *pErrorCode) |
Perform the Unicode BiDi algorithm. More... | |
U_CAPI void U_EXPORT2 | ubidi_setLine (const UBiDi *pParaBiDi, UTextOffset start, UTextOffset limit, UBiDi *pLineBiDi, UErrorCode *pErrorCode) |
ubidi_setLine() sets a UBiDi to contain the reordering information, especially the resolved levels, for all the characters in a line of text. More... | |
U_CAPI UBiDiDirection U_EXPORT2 | ubidi_getDirection (const UBiDi *pBiDi) |
Get the directionality of the text. More... | |
U_CAPI const UChar* U_EXPORT2 | ubidi_getText (const UBiDi *pBiDi) |
Get the pointer to the text. More... | |
U_CAPI UTextOffset U_EXPORT2 | ubidi_getLength (const UBiDi *pBiDi) |
Get the length of the text. More... | |
U_CAPI UBiDiLevel U_EXPORT2 | ubidi_getParaLevel (const UBiDi *pBiDi) |
Get the paragraph level of the text. More... | |
U_CAPI UBiDiLevel U_EXPORT2 | ubidi_getLevelAt (const UBiDi *pBiDi, UTextOffset charIndex) |
Get the level for one character. More... | |
U_CAPI const UBiDiLevel* U_EXPORT2 | ubidi_getLevels (UBiDi *pBiDi, UErrorCode *pErrorCode) |
Get an array of levels for each character.
. More... | |
U_CAPI void U_EXPORT2 | ubidi_getLogicalRun (const UBiDi *pBiDi, UTextOffset logicalStart, UTextOffset *pLogicalLimit, UBiDiLevel *pLevel) |
Get a logical run. More... | |
U_CAPI UTextOffset U_EXPORT2 | ubidi_countRuns (UBiDi *pBiDi, UErrorCode *pErrorCode) |
Get the number of runs. More... | |
U_CAPI UBiDiDirection U_EXPORT2 | ubidi_getVisualRun (UBiDi *pBiDi, UTextOffset runIndex, UTextOffset *pLogicalStart, UTextOffset *pLength) |
Get one run's logical start, length, and directionality, which can be 0 for LTR or 1 for RTL. More... | |
U_CAPI UTextOffset U_EXPORT2 | ubidi_getVisualIndex (UBiDi *pBiDi, UTextOffset logicalIndex, UErrorCode *pErrorCode) |
Get the visual position from a logical text position. More... | |
U_CAPI UTextOffset U_EXPORT2 | ubidi_getLogicalIndex (UBiDi *pBiDi, UTextOffset visualIndex, UErrorCode *pErrorCode) |
Get the logical text position from a visual position. More... | |
U_CAPI void U_EXPORT2 | ubidi_getLogicalMap (UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode) |
Get a logical-to-visual index map (array) for the characters in the UBiDi (paragraph or line) object. More... | |
U_CAPI void U_EXPORT2 | ubidi_getVisualMap (UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode) |
Get a visual-to-logical index map (array) for the characters in the UBiDi (paragraph or line) object. More... | |
U_CAPI void U_EXPORT2 | ubidi_reorderLogical (const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) |
This is a convenience function that does not use a UBiDi object. More... | |
U_CAPI void U_EXPORT2 | ubidi_reorderVisual (const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) |
This is a convenience function that does not use a UBiDi object. More... | |
U_CAPI void U_EXPORT2 | ubidi_invertMap (const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length) |
Invert an index map. More... | |
U_CAPI UTextOffset U_EXPORT2 | ubidi_writeReordered (UBiDi *pBiDi, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) |
Take a UBiDi object containing the reordering information for one paragraph or line of text as set by ubidi_setPara() or ubidi_setLine() and write a reordered string to the destination buffer. More... | |
U_CAPI UTextOffset U_EXPORT2 | ubidi_writeReverse (const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) |
Reverse a Right-To-Left run of Unicode text. More... |
This is an implementation of the Unicode Bidirectional algorithm. The algorithm is defined in the Unicode Technical Report 9, version 5, also described in The Unicode Standard, Version 3.0 .
In functions with an error code parameter, the pErrorCode
pointer must be valid and the value that it points to must not indicate a failure before the function call. Otherwise, the function returns immediately. After the function call, the value indicates success or failure.
The "limit" of a sequence of characters is the position just after their last character, i.e., one more than that position.
Some of the API functions provide access to "runs". Such a "run" is defined as a sequence of characters that are at the same embedding level after performing the BIDI algorithm.
This is (hypothetical) sample code that illustrates how the ICU BiDi API could be used to render a paragraph of text. Rendering code depends highly on the graphics system, therefore this sample code must make a lot of assumptions, which may or may not match any existing graphics system's properties.
The basic assumptions are:
#include "unicode/ubidi.h" typedef enum { styleNormal=0, styleSelected=1, styleBold=2, styleItalics=4, styleSuper=8, styleSub=16 } Style; typedef struct { UTextOffset limit; Style style; } StyleRun; int getTextWidth(const UChar *text, UTextOffset start, UTextOffset limit, const StyleRun *styleRuns, int styleRunCount); // set *pLimit and *pStyleRunLimit for a line // from text[start] and from styleRuns[styleRunStart] // using ubidi_getLogicalRun(para, ...) void getLineBreak(const UChar *text, UTextOffset start, UTextOffset *pLimit, UBiDi *para, const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, int *pLineWidth); // render runs on a line sequentially, always from left to right // prepare rendering a new line void startLine(UBiDiDirection textDirection, int lineWidth); // render a run of text and advance to the right by the run width // the text[start..limit-1] is always in logical order void renderRun(const UChar *text, UTextOffset start, UTextOffset limit, UBiDiDirection textDirection, Style style); // We could compute a cross-product // from the style runs with the directional runs // and then reorder it. // Instead, here we iterate over each run type // and render the intersections - // with shortcuts in simple (and common) cases. // renderParagraph() is the main function. // render a directional run with // (possibly) multiple style runs intersecting with it void renderDirectionalRun(const UChar *text, UTextOffset start, UTextOffset limit, UBiDiDirection direction, const StyleRun *styleRuns, int styleRunCount) { int i; // iterate over style runs if(direction==UBIDI_LTR) { int styleLimit; for(i=0; i<styleRunCount; ++i) { styleLimit=styleRun[i].limit; if(start<styleLimit) { if(styleLimit>limit) { styleLimit=limit; } renderRun(text, start, styleLimit, direction, styleRun[i].style); if(styleLimit==limit) { break; } start=styleLimit; } } } else { int styleStart; for(i=styleRunCount-1; i>=0; --i) { if(i>0) { styleStart=styleRun[i-1].limit; } else { styleStart=0; } if(limit>=styleStart) { if(styleStart<start) { styleStart=start; } renderRun(text, styleStart, limit, direction, styleRun[i].style); if(styleStart==start) { break; } limit=styleStart; } } } } // the line object represents text[start..limit-1] void renderLine(UBiDi *line, const UChar *text, UTextOffset start, UTextOffset limit, const StyleRun *styleRuns, int styleRunCount) { UBiDiDirection direction=ubidi_getDirection(line); if(direction!=UBIDI_MIXED) { // unidirectional if(styleRunCount<=1) { renderRun(text, start, limit, direction, styleRuns[0].style); } else { renderDirectionalRun(text, start, limit, direction, styleRuns, styleRunCount); } } else { // mixed-directional UTextOffset count, i, length; UBiDiLevel level; count=ubidi_countRuns(para, pErrorCode); if(U_SUCCESS(*pErrorCode)) { if(styleRunCount<=1) { Style style=styleRuns[0].style; // iterate over directional runs for(i=0; i<count; ++i) { direction=ubidi_getVisualRun(para, i, &start, &length); renderRun(text, start, start+length, direction, style); } } else { UTextOffset j; // iterate over both directional and style runs for(i=0; i<count; ++i) { direction=ubidi_getVisualRun(line, i, &start, &length); renderDirectionalRun(text, start, start+length, direction, styleRuns, styleRunCount); } } } } } void renderParagraph(const UChar *text, UTextOffset length, UBiDiDirection textDirection, const StyleRun *styleRuns, int styleRunCount, int lineWidth, UErrorCode *pErrorCode) { UBiDi *para; if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { return; } para=ubidi_openSized(length, 0, pErrorCode); if(para==NULL) { return; } ubidi_setPara(para, text, length, textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, NULL, pErrorCode); if(U_SUCCESS(*pErrorCode)) { UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); StyleRun styleRun={ length, styleNormal }; int width; if(styleRuns==NULL || styleRunCount<=0) { styleRunCount=1; styleRuns=&styleRun; } // assume styleRuns[styleRunCount-1].limit>=length width=getTextWidth(text, 0, length, styleRuns, styleRunCount); if(width<=lineWidth) { // everything fits onto one line // prepare rendering a new line from either left or right startLine(paraLevel, width); renderLine(para, text, 0, length, styleRuns, styleRunCount); } else { UBiDi *line; // we need to render several lines line=ubidi_openSized(length, 0, pErrorCode); if(line!=NULL) { UTextOffset start=0, limit; int styleRunStart=0, styleRunLimit; for(;;) { limit=length; styleRunLimit=styleRunCount; getLineBreak(text, start, &limit, para, styleRuns, styleRunStart, &styleRunLimit, &width); ubidi_setLine(para, start, limit, line, pErrorCode); if(U_SUCCESS(*pErrorCode)) { // prepare rendering a new line // from either left or right startLine(paraLevel, width); renderLine(line, text, start, limit, styleRuns+styleRunStart, styleRunLimit-styleRunStart); } if(limit==length) { break; } start=limit; styleRunStart=styleRunLimit-1; if(start>=styleRuns[styleRunStart].limit) { ++styleRunStart; } } ubidi_close(line); } } } ubidi_close(para); }
Definition in file ubidi.h.
|
Paragraph level setting. If there is no strong character, then set the paragraph level to 0 (left-to-right). |
|
Paragraph level setting. If there is no strong character, then set the paragraph level to 1 (right-to-left). |
|
option bit for ubidi_writeReordered(): replace characters with the "mirrored" property in RTL runs by their mirror-image mappings.
|
|
option bit for ubidi_writeReordered(): surround the run with LRMs if necessary; this is part of the approximate "inverse BiDi" algorithm.
|
|
option bit for ubidi_writeReordered(): keep combining characters after their base characters in RTL runs.
|
|
Bit flag for level input. Overrides directional properties. |
|
Maximum explicit embedding level.
(The maximum resolved level can be up to |
|
option bit for ubidi_writeReordered(): write the output in reverse order.
This has the same effect as calling
|
|
option bit for ubidi_writeReordered(): remove BiDi control characters (this does not affect UBIDI_INSERT_LRM_FOR_NUMERIC).
|
|
|
|
|
|
UBiDiLevel is the type of the level values in this BiDi implementation. It holds an embedding level and indicates the visual direction by its bit 0 (even/odd value).
It can also hold non-level values for the
|
|
|
|
.
Important: If a
Referenced by BiDi::~BiDi(). |
|
Get the number of runs.
This function may invoke the actual reordering on the
Referenced by BiDi::countRuns(). |
|
Get the directionality of the text.
Referenced by BiDi::getDirection(). |
|
Get the length of the text.
Referenced by BiDi::getLength(). |
|
Get the level for one character.
Referenced by BiDi::getLevelAt(). |
|
Get an array of levels for each character. .
Note that this function may allocate memory under some circumstances, unlike
Referenced by BiDi::getLevels(). |
|
Get the logical text position from a visual position.
If such a mapping is used many times on the same
This is the inverse function to
Referenced by BiDi::getLogicalIndex(). |
|
Get a logical-to-visual index map (array) for the characters in the UBiDi (paragraph or line) object.
The index map will result in
Referenced by BiDi::getLogicalMap(). |
|
Get a logical run. This function returns information about a run and is used to retrieve runs in logical order. This is especially useful for line-breaking on a paragraph.
Referenced by BiDi::getLogicalRun(). |
|
Get the paragraph level of the text.
Referenced by BiDi::getParaLevel(). |
|
Get the pointer to the text.
Referenced by BiDi::getText(). |
|
Get the visual position from a logical text position.
If such a mapping is used many times on the same
Note that in right-to-left runs, this mapping places modifier letters before base characters and second surrogates before first ones.
Referenced by BiDi::getVisualIndex(). |
|
Get a visual-to-logical index map (array) for the characters in the UBiDi (paragraph or line) object.
The index map will result in
Referenced by BiDi::getVisualMap(). |
|
Get one run's logical start, length, and directionality, which can be 0 for LTR or 1 for RTL. In an RTL run, the character at the logical start is visually on the right of the displayed run. The length is the number of characters in the run.
Note that in right-to-left runs, code like this places modifier letters before base characters and second surrogates before first ones. Referenced by BiDi::getVisualRun(). |
|
Invert an index map. The one-to-one index mapping of the first map is inverted and written to the second one.
Referenced by BiDi::invertMap(). |
|
Is this BiDi object set to perform the inverse BiDi algorithm?
Referenced by BiDi::isInverse(). |
|
Allocate a
Such an object is initially empty. It is assigned the BiDi properties of a paragraph by
This object can be reused for as long as it is not deallocated by calling
Referenced by BiDi::BiDi(). |
|
Allocate a
This function provides a Subsequent functions will not allocate any more memory, and are thus guaranteed not to fail because of lack of memory.
The preallocation can be limited to some of the internal memory by setting some values to 0 here. That means that if, e.g.,
The number of runs depends on the actual text and maybe anywhere between 1 and
Referenced by BiDi::BiDi(). |
|
This is a convenience function that does not use a UBiDi object.
It is intended to be used for when an application has determined the levels of objects (character sequences) and just needs to have them reordered (L2). This is equivalent to using
The index map will result in Referenced by BiDi::reorderLogical(). |
|
This is a convenience function that does not use a UBiDi object.
It is intended to be used for when an application has determined the levels of objects (character sequences) and just needs to have them reordered (L2). This is equivalent to using
The index map will result in Referenced by BiDi::reorderVisual(). |
|
Modify the operation of the BiDi algorithm such that it approximates an "inverse BiDi" algorithm.
This function must be called before
The normal operation of the BiDi algorithm as described in the Unicode Technical Report is to take text stored in logical (keyboard, typing) order and to determine the reordering of it for visual rendering. Some legacy codepages store text in visual order, and for operations with standard, Unicode-based algorithms, the text needs to be transformed to logical order. This is effectively the inverse algorithm of the described BiDi algorithm. Note that there is no standard algorithm for this "inverse BiDi" and that the current implementation provides only an approximation of "inverse BiDi".
With
Output runs should be retrieved using
Referenced by BiDi::setInverse(). |
|
This line of text is specified by referring to a
In the new line object, the indexes will range from 0 to
This is used after calling
After line-breaking, rules (L1) and (L2) for the treatment of trailing WS and for reordering are performed on a
Important:
The text pointer that was stored in
Referenced by BiDi::setLine(). |
|
Perform the Unicode BiDi algorithm. It is defined in the Unicode Technical Report 9, version 5, also described in The Unicode Standard, Version 3.0 .
This function takes a single plain text paragraph with or without externally specified embedding levels from <quote>styled</quote> text and computes the left-right-directionality of each character.
If the entire paragraph consists of text of only one direction, then the function may not perform all the steps described by the algorithm, i.e., some levels may not be the same as if all steps were performed. This is not relevant for unidirectional text.
The text must be externally split into separate paragraphs (rule P1). Paragraph separators (B) should appear at most at the very end.
Except for that bit, it must be
Caution: A copy of this pointer, not of the levels, will be stored in the
After the
The
Referenced by BiDi::setPara(). |
|
Take a
This function preserves the integrity of characters with multiple code units and (optionally) modifier letters. Characters in RTL runs can be replaced by mirror-image characters in the destination buffer. Note that "real" mirroring has to be done in a rendering engine by glyph selection and that for many "mirrored" characters there are no Unicode characters as mirror-image equivalents. There are also options to insert or remove BiDi control characters; see the description of the
The text was aliased (only the pointer was stored without copying the contents) and must not have been modified since the
Referenced by BiDi::writeReordered(). |
|
Reverse a Right-To-Left run of Unicode text. This function preserves the integrity of characters with multiple code units and (optionally) modifier letters. Characters can be replaced by mirror-image characters in the destination buffer. Note that "real" mirroring has to be done in a rendering engine by glyph selection and that for many "mirrored" characters there are no Unicode characters as mirror-image equivalents. There are also options to insert or remove BiDi control characters.
This function is the implementation for reversing RTL runs as part of
Referenced by BiDi::writeReverse(). |