ICU 65.1  65.1
unorm2.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: unorm2.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009dec15
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UNORM2_H__
20 #define __UNORM2_H__
21 
33 #include "unicode/utypes.h"
34 #include "unicode/localpointer.h"
35 #include "unicode/stringoptions.h"
36 #include "unicode/uset.h"
37 
45 typedef enum {
88 
114 
119 struct UNormalizer2;
120 typedef struct UNormalizer2 UNormalizer2;
122 #if !UCONFIG_NO_NORMALIZATION
123 
135 U_STABLE const UNormalizer2 * U_EXPORT2
136 unorm2_getNFCInstance(UErrorCode *pErrorCode);
137 
149 U_STABLE const UNormalizer2 * U_EXPORT2
150 unorm2_getNFDInstance(UErrorCode *pErrorCode);
151 
163 U_STABLE const UNormalizer2 * U_EXPORT2
165 
177 U_STABLE const UNormalizer2 * U_EXPORT2
179 
191 U_STABLE const UNormalizer2 * U_EXPORT2
193 
215 U_STABLE const UNormalizer2 * U_EXPORT2
216 unorm2_getInstance(const char *packageName,
217  const char *name,
218  UNormalization2Mode mode,
219  UErrorCode *pErrorCode);
220 
236 U_STABLE UNormalizer2 * U_EXPORT2
237 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
238 
245 U_STABLE void U_EXPORT2
246 unorm2_close(UNormalizer2 *norm2);
247 
248 #if U_SHOW_CPLUSPLUS_API
249 
250 U_NAMESPACE_BEGIN
251 
262 
263 U_NAMESPACE_END
264 
265 #endif
266 
283 U_STABLE int32_t U_EXPORT2
284 unorm2_normalize(const UNormalizer2 *norm2,
285  const UChar *src, int32_t length,
286  UChar *dest, int32_t capacity,
287  UErrorCode *pErrorCode);
306 U_STABLE int32_t U_EXPORT2
308  UChar *first, int32_t firstLength, int32_t firstCapacity,
309  const UChar *second, int32_t secondLength,
310  UErrorCode *pErrorCode);
329 U_STABLE int32_t U_EXPORT2
330 unorm2_append(const UNormalizer2 *norm2,
331  UChar *first, int32_t firstLength, int32_t firstCapacity,
332  const UChar *second, int32_t secondLength,
333  UErrorCode *pErrorCode);
334 
354 U_STABLE int32_t U_EXPORT2
356  UChar32 c, UChar *decomposition, int32_t capacity,
357  UErrorCode *pErrorCode);
358 
388 U_STABLE int32_t U_EXPORT2
390  UChar32 c, UChar *decomposition, int32_t capacity,
391  UErrorCode *pErrorCode);
392 
408 U_STABLE UChar32 U_EXPORT2
409 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
410 
420 U_STABLE uint8_t U_EXPORT2
422 
439 U_STABLE UBool U_EXPORT2
440 unorm2_isNormalized(const UNormalizer2 *norm2,
441  const UChar *s, int32_t length,
442  UErrorCode *pErrorCode);
443 
462 unorm2_quickCheck(const UNormalizer2 *norm2,
463  const UChar *s, int32_t length,
464  UErrorCode *pErrorCode);
465 
490 U_STABLE int32_t U_EXPORT2
492  const UChar *s, int32_t length,
493  UErrorCode *pErrorCode);
494 
504 U_STABLE UBool U_EXPORT2
506 
516 U_STABLE UBool U_EXPORT2
518 
527 U_STABLE UBool U_EXPORT2
528 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
529 
596 U_STABLE int32_t U_EXPORT2
597 unorm_compare(const UChar *s1, int32_t length1,
598  const UChar *s2, int32_t length2,
599  uint32_t options,
600  UErrorCode *pErrorCode);
601 
602 #endif /* !UCONFIG_NO_NORMALIZATION */
603 #endif /* __UNORM2_H__ */
const UNormalizer2 * unorm2_getNFDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFD normalization.
"Fast C or D" form.
Definition: unorm2.h:76
The input string is not in the normalization form.
Definition: unorm2.h:99
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
const UNormalizer2 * unorm2_getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode *pErrorCode)
Returns a UNormalizer2 instance which uses the specified data file (packageName/name similar to ucnv_...
int32_t unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the raw decomposition mapping of c.
Map, and reorder canonically.
Definition: unorm2.h:63
C API: Bit set option bit constants for various string and character processing functions.
UBool unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
Tests if the character is normalization-inert.
The input string is in the normalization form.
Definition: unorm2.h:104
void unorm2_close(UNormalizer2 *norm2)
Closes a UNormalizer2 instance from unorm2_openFiltered().
UBool unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
const UNormalizer2 * unorm2_getNFCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFC normalization.
The input string may or may not be in the normalization form.
Definition: unorm2.h:112
Compose only contiguously.
Definition: unorm2.h:86
int32_t unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the decomposition mapping of c.
UNormalizer2 * unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode)
Constructs a filtered normalizer wrapping any UNormalizer2 instance and a filter set.
C API: Unicode Set.
UNormalizationCheckResult unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
int32_t unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
Writes the normalized form of the source string to the destination string (replacing its contents) an...
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
struct UNormalizer2 UNormalizer2
C typedef for struct UNormalizer2.
Definition: unorm2.h:120
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
const UNormalizer2 * unorm2_getNFKDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKD normalization.
UChar32 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
Performs pairwise composition of a & b and returns the composite if there is one. ...
const UNormalizer2 * unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:45
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
uint8_t unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
Gets the combining class of c.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:47
int32_t unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
UBool unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary after it, regardless of context...
int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Returns the end of the normalized substring of the input string.
Basic definitions for ICU, for both C and C++ APIs.
UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary before it, regardless of context...
int32_t unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the second string to the first string (merging them at the boundary) and returns the length o...
Decomposition followed by composition.
Definition: unorm2.h:54
"Smart pointer" class, closes a UNormalizer2 via unorm2_close().
const UNormalizer2 * unorm2_getNFKCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC normalization.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:94
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261