ICU 65.1  65.1
ubrk.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 ******************************************************************************
8 */
9 
10 #ifndef UBRK_H
11 #define UBRK_H
12 
13 #include "unicode/utypes.h"
14 #include "unicode/uloc.h"
15 #include "unicode/utext.h"
16 #include "unicode/localpointer.h"
17 
22 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
23 # define UBRK_TYPEDEF_UBREAK_ITERATOR
24 
29 #endif
30 
31 #if !UCONFIG_NO_BREAK_ITERATION
32 
33 #include "unicode/parseerr.h"
34 
99 typedef enum UBreakIteratorType {
108 
109 #ifndef U_HIDE_DEPRECATED_API
110 
124 #endif // U_HIDE_DEPRECATED_API
126 
130 #define UBRK_DONE ((int32_t) -1)
131 
132 
144 typedef enum UWordBreak {
167 } UWordBreak;
168 
180 typedef enum ULineBreakTag {
190 } ULineBreakTag;
191 
192 
193 
205 typedef enum USentenceBreakTag {
222 
223 
241 U_STABLE UBreakIterator* U_EXPORT2
243  const char *locale,
244  const UChar *text,
245  int32_t textLength,
246  UErrorCode *status);
247 
263 U_STABLE UBreakIterator* U_EXPORT2
264 ubrk_openRules(const UChar *rules,
265  int32_t rulesLength,
266  const UChar *text,
267  int32_t textLength,
268  UParseError *parseErr,
269  UErrorCode *status);
270 
291 U_STABLE UBreakIterator* U_EXPORT2
292 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
293  const UChar * text, int32_t textLength,
294  UErrorCode * status);
295 
314 U_STABLE UBreakIterator * U_EXPORT2
316  const UBreakIterator *bi,
317  void *stackBuffer,
318  int32_t *pBufferSize,
319  UErrorCode *status);
320 
321 #ifndef U_HIDE_DEPRECATED_API
322 
327 #define U_BRK_SAFECLONE_BUFFERSIZE 1
328 
329 #endif /* U_HIDE_DEPRECATED_API */
330 
337 U_STABLE void U_EXPORT2
339 
340 #if U_SHOW_CPLUSPLUS_API
341 
342 U_NAMESPACE_BEGIN
343 
354 
355 U_NAMESPACE_END
356 
357 #endif
358 
371 U_STABLE void U_EXPORT2
373  const UChar* text,
374  int32_t textLength,
375  UErrorCode* status);
376 
377 
395 U_STABLE void U_EXPORT2
397  UText* text,
398  UErrorCode* status);
399 
400 
401 
410 U_STABLE int32_t U_EXPORT2
411 ubrk_current(const UBreakIterator *bi);
412 
422 U_STABLE int32_t U_EXPORT2
424 
434 U_STABLE int32_t U_EXPORT2
436 
444 U_STABLE int32_t U_EXPORT2
446 
456 U_STABLE int32_t U_EXPORT2
458 
468 U_STABLE int32_t U_EXPORT2
470  int32_t offset);
471 
481 U_STABLE int32_t U_EXPORT2
483  int32_t offset);
484 
494 U_STABLE const char* U_EXPORT2
495 ubrk_getAvailable(int32_t index);
496 
505 U_STABLE int32_t U_EXPORT2
506 ubrk_countAvailable(void);
507 
508 
518 U_STABLE UBool U_EXPORT2
519 ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
520 
530 U_STABLE int32_t U_EXPORT2
532 
550 U_STABLE int32_t U_EXPORT2
551 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
552 
562 U_STABLE const char* U_EXPORT2
564 
590 U_STABLE void U_EXPORT2
592  UText *text,
593  UErrorCode *status);
594 
595 
621 U_STABLE int32_t U_EXPORT2
623  uint8_t * binaryRules, int32_t rulesCapacity,
624  UErrorCode * status);
625 
626 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
627 
628 #endif
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:28
Tag value for for sentences that do not contain an ending sentence terminator ('. ...
Definition: ubrk.h:217
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
Tag value for words containing kana characters, upper limit.
Definition: ubrk.h:162
Tag value for words containing ideographic characters, lower limit.
Definition: ubrk.h:164
void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
Tag value for words containing letters, upper limit.
Definition: ubrk.h:158
int32_t ubrk_next(UBreakIterator *bi)
Advance the iterator to the boundary following the current boundary.
void ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
Upper bound for hard line breaks.
Definition: ubrk.h:189
UBreakIterator * ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
Tag value for words containing kana characters, lower limit.
Definition: ubrk.h:160
Character breaks.
Definition: ubrk.h:101
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
Definition: ubrk.h:180
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
Definition: ubrk.h:144
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
Definition: ubrk.h:205
Tag value for a hard, or mandatory line break.
Definition: ubrk.h:187
Upper bound for tags for sentences ended by sentence terminators.
Definition: ubrk.h:212
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:99
Tag value for for sentences ending with a sentence terminator ('.
Definition: ubrk.h:210
UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
Word breaks.
Definition: ubrk.h:103
int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position...
int32_t ubrk_first(UBreakIterator *bi)
Set the iterator position to zero, the start of the text being scanned.
int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Set the iterator position to the first boundary preceding the specified offset.
C API: Abstract Unicode Text API.
void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
Upper bound for tags for uncategorized words.
Definition: ubrk.h:149
One more than the highest normal UBreakIteratorType value.
Definition: ubrk.h:123
Tag value for words that appear to be numbers, upper limit.
Definition: ubrk.h:153
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
int32_t ubrk_previous(UBreakIterator *bi)
Set the iterator position to the boundary preceding the current boundary.
Tag value for soft line breaks, positions at which a line break is acceptable but not required...
Definition: ubrk.h:183
"Smart pointer" class, closes a UBreakIterator via ubrk_close().
Upper bound for tags for sentences ended by a separator.
Definition: ubrk.h:219
UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
int32_t ubrk_last(UBreakIterator *bi)
Set the iterator position to the index immediately beyond the last character in the text being scanne...
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position...
const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
C API: Parse Error Information.
int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.
Tag value for "words" that do not fit into any of other categories.
Definition: ubrk.h:147
int32_t ubrk_getBinaryRules(UBreakIterator *bi, uint8_t *binaryRules, int32_t rulesCapacity, UErrorCode *status)
Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:338
Line breaks.
Definition: ubrk.h:105
Sentence breaks.
Definition: ubrk.h:107
UText struct.
Definition: utext.h:1347
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters...
Definition: ubrk.h:156
Tag value for words that appear to be numbers, lower limit.
Definition: ubrk.h:151
Upper bound for soft line breaks.
Definition: ubrk.h:185
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
Definition: ubrk.h:118
UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specified position is a boundary position.
int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Advance the iterator to the first boundary following the specified offset.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
Tag value for words containing ideographic characters, upper limit.
Definition: ubrk.h:166
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
C API: Locale.