22 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR 23 # define UBRK_TYPEDEF_UBREAK_ITERATOR 31 #if !UCONFIG_NO_BREAK_ITERATION 109 #ifndef U_HIDE_DEPRECATED_API 124 #endif // U_HIDE_DEPRECATED_API 130 #define UBRK_DONE ((int32_t) -1) 293 const UChar * text, int32_t textLength,
318 int32_t *pBufferSize,
321 #ifndef U_HIDE_DEPRECATED_API 327 #define U_BRK_SAFECLONE_BUFFERSIZE 1 340 #if U_SHOW_CPLUSPLUS_API 623 uint8_t * binaryRules, int32_t rulesCapacity,
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Tag value for for sentences that do not contain an ending sentence terminator ('. ...
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
Tag value for words containing kana characters, upper limit.
Tag value for words containing ideographic characters, lower limit.
void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
Tag value for words containing letters, upper limit.
int32_t ubrk_next(UBreakIterator *bi)
Advance the iterator to the boundary following the current boundary.
void ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
Upper bound for hard line breaks.
UBreakIterator * ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
Tag value for words containing kana characters, lower limit.
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
Tag value for a hard, or mandatory line break.
Upper bound for tags for sentences ended by sentence terminators.
UBreakIteratorType
The possible types of text boundaries.
Tag value for for sentences ending with a sentence terminator ('.
UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position...
int32_t ubrk_first(UBreakIterator *bi)
Set the iterator position to zero, the start of the text being scanned.
int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Set the iterator position to the first boundary preceding the specified offset.
C API: Abstract Unicode Text API.
void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
Upper bound for tags for uncategorized words.
One more than the highest normal UBreakIteratorType value.
Tag value for words that appear to be numbers, upper limit.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
int32_t ubrk_previous(UBreakIterator *bi)
Set the iterator position to the boundary preceding the current boundary.
Tag value for soft line breaks, positions at which a line break is acceptable but not required...
"Smart pointer" class, closes a UBreakIterator via ubrk_close().
Upper bound for tags for sentences ended by a separator.
UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
int32_t ubrk_last(UBreakIterator *bi)
Set the iterator position to the index immediately beyond the last character in the text being scanne...
uint16_t UChar
The base type for UTF-16 code units and pointers.
int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position...
const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
C API: Parse Error Information.
int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.
Tag value for "words" that do not fit into any of other categories.
int32_t ubrk_getBinaryRules(UBreakIterator *bi, uint8_t *binaryRules, int32_t rulesCapacity, UErrorCode *status)
Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
A UParseError struct is used to returned detailed information about parsing errors.
Basic definitions for ICU, for both C and C++ APIs.
void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters...
Tag value for words that appear to be numbers, lower limit.
Upper bound for soft line breaks.
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specified position is a boundary position.
int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Advance the iterator to the first boundary following the specified offset.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Tag value for words containing ideographic characters, upper limit.
int8_t UBool
The ICU boolean type.