ICU 65.1
65.1
|
This file defines an immutable Unicode code point trie. More...
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/ucpmap.h"
#include "unicode/utf8.h"
Go to the source code of this file.
Data Structures | |
struct | UCPTrie |
Immutable Unicode code point trie structure. More... | |
Namespaces | |
icu | |
File coll.h. | |
Macros | |
#define | UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) |
Macro parameter value for a trie with 16-bit data values. More... | |
#define | UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) |
Macro parameter value for a trie with 32-bit data values. More... | |
#define | UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) |
Macro parameter value for a trie with 8-bit data values. More... | |
#define | UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) |
Returns a trie value for a code point, with range checking. More... | |
#define | UCPTRIE_SMALL_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) |
Returns a 16-bit trie value for a code point, with range checking. More... | |
#define | UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) |
UTF-16: Reads the next code point (UChar32 c, out), post-increments src, and gets a value from the trie. More... | |
#define | UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) |
UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, and gets a value from the trie. More... | |
#define | UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) |
UTF-8: Post-increments src and gets a value from the trie. More... | |
#define | UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) |
UTF-8: Pre-decrements src and gets a value from the trie. More... | |
#define | UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) |
Returns a trie value for an ASCII code point, without range checking. More... | |
#define | UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) |
Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. More... | |
#define | UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) |
Returns a trie value for a supplementary code point (U+10000..U+10FFFF), without range checking. More... | |
Enumerations | |
enum | UCPTrieType { UCPTRIE_TYPE_ANY = -1, UCPTRIE_TYPE_FAST, UCPTRIE_TYPE_SMALL } |
Selectors for the type of a UCPTrie. More... | |
enum | UCPTrieValueWidth { UCPTRIE_VALUE_BITS_ANY = -1, UCPTRIE_VALUE_BITS_16, UCPTRIE_VALUE_BITS_32, UCPTRIE_VALUE_BITS_8 } |
Selectors for the number of bits in a UCPTrie data value. More... | |
Functions | |
U_CAPI UCPTrie * | ucptrie_openFromBinary (UCPTrieType type, UCPTrieValueWidth valueWidth, const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode) |
Opens a trie from its binary form, stored in 32-bit-aligned memory. More... | |
U_CAPI void | ucptrie_close (UCPTrie *trie) |
Closes a trie and releases associated memory. More... | |
U_CAPI UCPTrieType | ucptrie_getType (const UCPTrie *trie) |
Returns the trie type. More... | |
U_CAPI UCPTrieValueWidth | ucptrie_getValueWidth (const UCPTrie *trie) |
Returns the number of bits in a trie data value. More... | |
U_CAPI uint32_t | ucptrie_get (const UCPTrie *trie, UChar32 c) |
Returns the value for a code point as stored in the trie, with range checking. More... | |
U_CAPI UChar32 | ucptrie_getRange (const UCPTrie *trie, UChar32 start, UCPMapRangeOption option, uint32_t surrogateValue, UCPMapValueFilter *filter, const void *context, uint32_t *pValue) |
Returns the last code point such that all those from start to there have the same value. More... | |
U_CAPI int32_t | ucptrie_toBinary (const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode) |
Writes a memory-mappable form of the trie into 32-bit aligned memory. More... | |
This file defines an immutable Unicode code point trie.
Definition in file ucptrie.h.
#define UCPTRIE_16 | ( | trie, | |
i | |||
) | ((trie)->data.ptr16[i]) |
Macro parameter value for a trie with 16-bit data values.
Use the name of this macro as a "dataAccess" parameter in other macros. Do not use this macro in any other way.
#define UCPTRIE_32 | ( | trie, | |
i | |||
) | ((trie)->data.ptr32[i]) |
Macro parameter value for a trie with 32-bit data values.
Use the name of this macro as a "dataAccess" parameter in other macros. Do not use this macro in any other way.
#define UCPTRIE_8 | ( | trie, | |
i | |||
) | ((trie)->data.ptr8[i]) |
Macro parameter value for a trie with 8-bit data values.
Use the name of this macro as a "dataAccess" parameter in other macros. Do not use this macro in any other way.
#define UCPTRIE_ASCII_GET | ( | trie, | |
dataAccess, | |||
c | |||
) | dataAccess(trie, c) |
Returns a trie value for an ASCII code point, without range checking.
trie | (const UCPTrie *, in) the trie (of either fast or small type) |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
c | (UChar32, in) the input code point; must be U+0000..U+007F |
#define UCPTRIE_FAST_BMP_GET | ( | trie, | |
dataAccess, | |||
c | |||
) | dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) |
Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
Can be used to look up a value for a UTF-16 code unit if other parts of the string processing check for surrogates.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
c | (UChar32, in) the input code point, must be U+0000..U+FFFF |
#define UCPTRIE_FAST_GET | ( | trie, | |
dataAccess, | |||
c | |||
) | dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) |
Returns a trie value for a code point, with range checking.
Returns the trie error value if c is not in the range 0..U+10FFFF.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
c | (UChar32, in) the input code point |
#define UCPTRIE_FAST_SUPP_GET | ( | trie, | |
dataAccess, | |||
c | |||
) | dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) |
Returns a trie value for a supplementary code point (U+10000..U+10FFFF), without range checking.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
c | (UChar32, in) the input code point, must be U+10000..U+10FFFF |
#define UCPTRIE_FAST_U16_NEXT | ( | trie, | |
dataAccess, | |||
src, | |||
limit, | |||
c, | |||
result | |||
) |
UTF-16: Reads the next code point (UChar32 c, out), post-increments src, and gets a value from the trie.
Sets the trie error value if c is an unpaired surrogate.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
src | (const UChar *, in/out) the source text pointer |
limit | (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated |
c | (UChar32, out) variable for the code point |
result | (out) variable for the trie lookup result |
#define UCPTRIE_FAST_U16_PREV | ( | trie, | |
dataAccess, | |||
start, | |||
src, | |||
c, | |||
result | |||
) |
UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, and gets a value from the trie.
Sets the trie error value if c is an unpaired surrogate.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
start | (const UChar *, in) the start pointer for the text |
src | (const UChar *, in/out) the source text pointer |
c | (UChar32, out) variable for the code point |
result | (out) variable for the trie lookup result |
#define UCPTRIE_FAST_U8_NEXT | ( | trie, | |
dataAccess, | |||
src, | |||
limit, | |||
result | |||
) |
UTF-8: Post-increments src and gets a value from the trie.
Sets the trie error value for an ill-formed byte sequence.
Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point because it would be more work to do so and is often not needed. If the trie value differs from the error value, then the byte sequence is well-formed, and the code point can be assembled without revalidation.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
src | (const char *, in/out) the source text pointer |
limit | (const char *, in) the limit pointer for the text (must not be NULL) |
result | (out) variable for the trie lookup result |
#define UCPTRIE_FAST_U8_PREV | ( | trie, | |
dataAccess, | |||
start, | |||
src, | |||
result | |||
) |
UTF-8: Pre-decrements src and gets a value from the trie.
Sets the trie error value for an ill-formed byte sequence.
Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point because it would be more work to do so and is often not needed. If the trie value differs from the error value, then the byte sequence is well-formed, and the code point can be assembled without revalidation.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
start | (const char *, in) the start pointer for the text |
src | (const char *, in/out) the source text pointer |
result | (out) variable for the trie lookup result |
#define UCPTRIE_SMALL_GET | ( | trie, | |
dataAccess, | |||
c | |||
) | dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) |
Returns a 16-bit trie value for a code point, with range checking.
Returns the trie error value if c is not in the range U+0000..U+10FFFF.
trie | (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL |
dataAccess | UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width |
c | (UChar32, in) the input code point |
enum UCPTrieType |
Selectors for the type of a UCPTrie.
Different trade-offs for size vs. speed.
Enumerator | |
---|---|
UCPTRIE_TYPE_ANY | For ucptrie_openFromBinary() to accept any type. ucptrie_getType() will return the actual type.
|
UCPTRIE_TYPE_FAST | Fast/simple/larger BMP data structure. Use functions and "fast" macros.
|
UCPTRIE_TYPE_SMALL | Small/slower BMP data structure. Use functions and "small" macros.
|
enum UCPTrieValueWidth |
Selectors for the number of bits in a UCPTrie data value.
Enumerator | |
---|---|
UCPTRIE_VALUE_BITS_ANY | For ucptrie_openFromBinary() to accept any data value width. ucptrie_getValueWidth() will return the actual data value width.
|
UCPTRIE_VALUE_BITS_16 | The trie stores 16 bits per data value. It returns them as unsigned values 0..0xffff=65535.
|
UCPTRIE_VALUE_BITS_32 | The trie stores 32 bits per data value.
|
UCPTRIE_VALUE_BITS_8 | The trie stores 8 bits per data value. It returns them as unsigned values 0..0xff=255.
|
Returns the value for a code point as stored in the trie, with range checking.
Returns the trie error value if c is not in the range 0..U+10FFFF.
Easier to use than UCPTRIE_FAST_GET() and similar macros but slower. Easier to use because, unlike the macros, this function works on all UCPTrie objects, for all types and value widths.
trie | the trie |
c | the code point |
U_CAPI UChar32 ucptrie_getRange | ( | const UCPTrie * | trie, |
UChar32 | start, | ||
UCPMapRangeOption | option, | ||
uint32_t | surrogateValue, | ||
UCPMapValueFilter * | filter, | ||
const void * | context, | ||
uint32_t * | pValue | ||
) |
Returns the last code point such that all those from start to there have the same value.
Can be used to efficiently iterate over all same-value ranges in a trie. (This is normally faster than iterating over code points and get()ting each value, but much slower than a data structure that stores ranges directly.)
If the UCPMapValueFilter function pointer is not NULL, then the value to be delivered is passed through that function, and the return value is the end of the range where all values are modified to the same actual value. The value is unchanged if that function pointer is NULL.
Example:
trie | the trie |
start | range start |
option | defines whether surrogates are treated normally, or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL |
surrogateValue | value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL |
filter | a pointer to a function that may modify the trie data value, or NULL if the values from the trie are to be used unmodified |
context | an opaque pointer that is passed on to the filter function |
pValue | if not NULL, receives the value that every code point start..end has; may have been modified by filter(context, trie value) if that function pointer is not NULL |
U_CAPI UCPTrieType ucptrie_getType | ( | const UCPTrie * | trie | ) |
U_CAPI UCPTrieValueWidth ucptrie_getValueWidth | ( | const UCPTrie * | trie | ) |
Returns the number of bits in a trie data value.
trie | the trie |
U_CAPI UCPTrie* ucptrie_openFromBinary | ( | UCPTrieType | type, |
UCPTrieValueWidth | valueWidth, | ||
const void * | data, | ||
int32_t | length, | ||
int32_t * | pActualLength, | ||
UErrorCode * | pErrorCode | ||
) |
Opens a trie from its binary form, stored in 32-bit-aligned memory.
Inverse of ucptrie_toBinary().
The memory must remain valid and unchanged as long as the trie is used. You must ucptrie_close() the trie once you are done using it.
type | selects the trie type; results in an U_INVALID_FORMAT_ERROR if it does not match the binary data; use UCPTRIE_TYPE_ANY to accept any type |
valueWidth | selects the number of bits in a data value; results in an U_INVALID_FORMAT_ERROR if it does not match the binary data; use UCPTRIE_VALUE_BITS_ANY to accept any data value width |
data | a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie |
length | the number of bytes available at data; can be more than necessary |
pActualLength | receives the actual number of bytes at data taken up by the trie data; can be NULL |
pErrorCode | an in/out ICU UErrorCode |
U_CAPI int32_t ucptrie_toBinary | ( | const UCPTrie * | trie, |
void * | data, | ||
int32_t | capacity, | ||
UErrorCode * | pErrorCode | ||
) |
Writes a memory-mappable form of the trie into 32-bit aligned memory.
Inverse of ucptrie_openFromBinary().
trie | the trie |
data | a pointer to 32-bit-aligned memory to be filled with the trie data; can be NULL if capacity==0 |
capacity | the number of bytes available at data, or 0 for pure preflighting |
pErrorCode | an in/out ICU UErrorCode; U_BUFFER_OVERFLOW_ERROR if the capacity is too small |