ICU 66.0.1  66.0.1
utext.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2004-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: utext.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004oct06
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UTEXT_H__
20 #define __UTEXT_H__
21 
140 #include "unicode/utypes.h"
141 #include "unicode/uchar.h"
142 #if U_SHOW_CPLUSPLUS_API
143 #include "unicode/localpointer.h"
144 #include "unicode/rep.h"
145 #include "unicode/unistr.h"
146 #include "unicode/chariter.h"
147 #endif
148 
149 
151 
152 struct UText;
153 typedef struct UText UText;
156 /***************************************************************************************
157  *
158  * C Functions for creating UText wrappers around various kinds of text strings.
159  *
160  ****************************************************************************************/
161 
162 
183 U_STABLE UText * U_EXPORT2
184 utext_close(UText *ut);
185 
186 #if U_SHOW_CPLUSPLUS_API
187 
188 U_NAMESPACE_BEGIN
189 
200 
201 U_NAMESPACE_END
202 
203 #endif
204 
226 U_STABLE UText * U_EXPORT2
227 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
228 
229 
244 U_STABLE UText * U_EXPORT2
245 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
246 
247 
248 #if U_SHOW_CPLUSPLUS_API
249 
261 U_STABLE UText * U_EXPORT2
263 
264 
277 U_STABLE UText * U_EXPORT2
279 
280 
293 U_STABLE UText * U_EXPORT2
295 
308 U_STABLE UText * U_EXPORT2
310 
311 #endif
312 
313 
371 U_STABLE UText * U_EXPORT2
372 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
373 
374 
386 U_STABLE UBool U_EXPORT2
387 utext_equals(const UText *a, const UText *b);
388 
389 
390 /*****************************************************************************
391  *
392  * Functions to work with the text represented by a UText wrapper
393  *
394  *****************************************************************************/
395 
407 U_STABLE int64_t U_EXPORT2
409 
423 U_STABLE UBool U_EXPORT2
424 utext_isLengthExpensive(const UText *ut);
425 
451 U_STABLE UChar32 U_EXPORT2
452 utext_char32At(UText *ut, int64_t nativeIndex);
453 
454 
465 U_STABLE UChar32 U_EXPORT2
467 
468 
487 U_STABLE UChar32 U_EXPORT2
488 utext_next32(UText *ut);
489 
490 
508 U_STABLE UChar32 U_EXPORT2
510 
511 
530 U_STABLE UChar32 U_EXPORT2
531 utext_next32From(UText *ut, int64_t nativeIndex);
532 
533 
534 
550 U_STABLE UChar32 U_EXPORT2
551 utext_previous32From(UText *ut, int64_t nativeIndex);
552 
565 U_STABLE int64_t U_EXPORT2
566 utext_getNativeIndex(const UText *ut);
567 
591 U_STABLE void U_EXPORT2
592 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
593 
610 U_STABLE UBool U_EXPORT2
611 utext_moveIndex32(UText *ut, int32_t delta);
612 
635 U_STABLE int64_t U_EXPORT2
637 
638 
673 U_STABLE int32_t U_EXPORT2
674 utext_extract(UText *ut,
675  int64_t nativeStart, int64_t nativeLimit,
676  UChar *dest, int32_t destCapacity,
677  UErrorCode *status);
678 
679 
680 
681 /************************************************************************************
682  *
683  * #define inline versions of selected performance-critical text access functions
684  * Caution: do not use auto increment++ or decrement-- expressions
685  * as parameters to these macros.
686  *
687  * For most use, where there is no extreme performance constraint, the
688  * normal, non-inline functions are a better choice. The resulting code
689  * will be smaller, and, if the need ever arises, easier to debug.
690  *
691  * These are implemented as #defines rather than real functions
692  * because there is no fully portable way to do inline functions in plain C.
693  *
694  ************************************************************************************/
695 
696 #ifndef U_HIDE_INTERNAL_API
697 
706 #define UTEXT_CURRENT32(ut) \
707  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
708  ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
709 #endif /* U_HIDE_INTERNAL_API */
710 
722 #define UTEXT_NEXT32(ut) \
723  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
724  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
725 
736 #define UTEXT_PREVIOUS32(ut) \
737  ((ut)->chunkOffset > 0 && \
738  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
739  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
740 
753 #define UTEXT_GETNATIVEINDEX(ut) \
754  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
755  (ut)->chunkNativeStart+(ut)->chunkOffset : \
756  (ut)->pFuncs->mapOffsetToNative(ut))
757 
769 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
770  int64_t __offset = (ix) - (ut)->chunkNativeStart; \
771  if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
772  (ut)->chunkOffset=(int32_t)__offset; \
773  } else { \
774  utext_setNativeIndex((ut), (ix)); \
775  } \
776 } UPRV_BLOCK_MACRO_END
777 
778 
779 
780 /************************************************************************************
781  *
782  * Functions related to writing or modifying the text.
783  * These will work only with modifiable UTexts. Attempting to
784  * modify a read-only UText will return an error status.
785  *
786  ************************************************************************************/
787 
788 
807 U_STABLE UBool U_EXPORT2
808 utext_isWritable(const UText *ut);
809 
810 
819 U_STABLE UBool U_EXPORT2
820 utext_hasMetaData(const UText *ut);
821 
822 
850 U_STABLE int32_t U_EXPORT2
851 utext_replace(UText *ut,
852  int64_t nativeStart, int64_t nativeLimit,
853  const UChar *replacementText, int32_t replacementLength,
854  UErrorCode *status);
855 
856 
857 
890 U_STABLE void U_EXPORT2
891 utext_copy(UText *ut,
892  int64_t nativeStart, int64_t nativeLimit,
893  int64_t destIndex,
894  UBool move,
895  UErrorCode *status);
896 
897 
919 U_STABLE void U_EXPORT2
920 utext_freeze(UText *ut);
921 
922 
929 enum {
963 };
964 
1002 typedef UText * U_CALLCONV
1003 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1004 
1005 
1014 typedef int64_t U_CALLCONV
1016 
1042 typedef UBool U_CALLCONV
1043 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1044 
1072 typedef int32_t U_CALLCONV
1074  int64_t nativeStart, int64_t nativeLimit,
1075  UChar *dest, int32_t destCapacity,
1076  UErrorCode *status);
1077 
1107 typedef int32_t U_CALLCONV
1109  int64_t nativeStart, int64_t nativeLimit,
1110  const UChar *replacementText, int32_t replacmentLength,
1111  UErrorCode *status);
1112 
1141 typedef void U_CALLCONV
1143  int64_t nativeStart, int64_t nativeLimit,
1144  int64_t nativeDest,
1145  UBool move,
1146  UErrorCode *status);
1147 
1161 typedef int64_t U_CALLCONV
1163 
1179 typedef int32_t U_CALLCONV
1180 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1181 
1182 
1200 typedef void U_CALLCONV
1202 
1203 
1213 struct UTextFuncs {
1228  int32_t tableSize;
1229 
1236 
1237 
1245 
1254 
1262 
1270 
1278 
1286 
1294 
1302 
1310 
1316 
1322 
1328 
1329 };
1334 typedef struct UTextFuncs UTextFuncs;
1335 
1347 struct UText {
1360  uint32_t magic;
1361 
1362 
1368  int32_t flags;
1369 
1370 
1377 
1384  int32_t sizeOfStruct;
1385 
1386  /* ------ 16 byte alignment boundary ----------- */
1387 
1388 
1395 
1400  int32_t extraSize;
1401 
1410 
1411  /* ---- 16 byte alignment boundary------ */
1412 
1418 
1424  int32_t chunkOffset;
1425 
1430  int32_t chunkLength;
1431 
1432  /* ---- 16 byte alignment boundary-- */
1433 
1434 
1442 
1448 
1454  void *pExtra;
1455 
1462  const void *context;
1463 
1464  /* --- 16 byte alignment boundary--- */
1465 
1471  const void *p;
1477  const void *q;
1483  const void *r;
1484 
1490  void *privP;
1491 
1492 
1493  /* --- 16 byte alignment boundary--- */
1494 
1495 
1501  int64_t a;
1502 
1508  int32_t b;
1509 
1515  int32_t c;
1516 
1517  /* ---- 16 byte alignment boundary---- */
1518 
1519 
1525  int64_t privA;
1531  int32_t privB;
1537  int32_t privC;
1538 };
1539 
1540 
1557 U_STABLE UText * U_EXPORT2
1558 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1559 
1560 // do not use #ifndef U_HIDE_INTERNAL_API around the following!
1566 enum {
1567  UTEXT_MAGIC = 0x345ad82c
1568 };
1569 
1577 #define UTEXT_INITIALIZER { \
1578  UTEXT_MAGIC, /* magic */ \
1579  0, /* flags */ \
1580  0, /* providerProps */ \
1581  sizeof(UText), /* sizeOfStruct */ \
1582  0, /* chunkNativeLimit */ \
1583  0, /* extraSize */ \
1584  0, /* nativeIndexingLimit */ \
1585  0, /* chunkNativeStart */ \
1586  0, /* chunkOffset */ \
1587  0, /* chunkLength */ \
1588  NULL, /* chunkContents */ \
1589  NULL, /* pFuncs */ \
1590  NULL, /* pExtra */ \
1591  NULL, /* context */ \
1592  NULL, NULL, NULL, /* p, q, r */ \
1593  NULL, /* privP */ \
1594  0, 0, 0, /* a, b, c */ \
1595  0, 0, 0 /* privA,B,C, */ \
1596  }
1597 
1598 
1600 
1601 
1602 
1603 #endif
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1073
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1515
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position...
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1327
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond...
Definition: utext.h:1409
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1417
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1201
int32_t providerProperties
Text provider properties.
Definition: utext.h:1376
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1454
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1501
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index...
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1430
C++ API: Unicode String.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1142
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1301
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:1003
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1235
void utext_freeze(UText *ut)
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1269
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:1015
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1490
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1309
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText...
Definition: utext.h:1368
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1537
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1244
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1253
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1477
(public) Function dispatch table for UText.
Definition: utext.h:1213
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:948
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1261
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
Text provider owns the text storage.
Definition: utext.h:962
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1471
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1285
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1441
int32_t reserved3
Definition: utext.h:1235
UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accommodat...
Definition: utext.h:1228
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1531
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1043
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1424
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1400
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1162
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceding the current position.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
C API: Unicode Properties.
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:934
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1180
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1525
There is meta data associated with the text.
Definition: utext.h:954
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1483
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator position by delta code points.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1394
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1384
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1321
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1508
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
"Smart pointer" class, closes a UText via utext_close().
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1447
UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1315
uint32_t magic
(private) Magic.
Definition: utext.h:1360
UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1108
UText struct.
Definition: utext.h:1347
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1277
Basic definitions for ICU, for both C and C++ APIs.
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:941
const void * context
(protected) Pointer to string or text-containing object or similar.
Definition: utext.h:1462
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
UText * utext_close(UText *ut)
Close function for UText instances.
C++ API: Character Iterator.
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1293
int32_t reserved2
Definition: utext.h:1235
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
C++ API: Replaceable String.