ICU 65.1  65.1
normlzr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ********************************************************************
5  * COPYRIGHT:
6  * Copyright (c) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  ********************************************************************
9  */
10 
11 #ifndef NORMLZR_H
12 #define NORMLZR_H
13 
14 #include "unicode/utypes.h"
15 
16 #if U_SHOW_CPLUSPLUS_API
17 
23 #if !UCONFIG_NO_NORMALIZATION
24 
25 #include "unicode/chariter.h"
26 #include "unicode/normalizer2.h"
27 #include "unicode/unistr.h"
28 #include "unicode/unorm.h"
29 #include "unicode/uobject.h"
30 
31 U_NAMESPACE_BEGIN
137 public:
138 #ifndef U_HIDE_DEPRECATED_API
139 
144  enum {
145  DONE=0xffff
146  };
147 
148  // Constructors
149 
160  Normalizer(const UnicodeString& str, UNormalizationMode mode);
161 
173  Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
174 
186 #endif /* U_HIDE_DEPRECATED_API */
187 
193  Normalizer(const Normalizer& copy);
194 
199  virtual ~Normalizer();
200 
201 
202  //-------------------------------------------------------------------------
203  // Static utility methods
204  //-------------------------------------------------------------------------
205 
206 #ifndef U_HIDE_DEPRECATED_API
207 
221  static void U_EXPORT2 normalize(const UnicodeString& source,
222  UNormalizationMode mode, int32_t options,
223  UnicodeString& result,
224  UErrorCode &status);
225 
243  static void U_EXPORT2 compose(const UnicodeString& source,
244  UBool compat, int32_t options,
245  UnicodeString& result,
246  UErrorCode &status);
247 
265  static void U_EXPORT2 decompose(const UnicodeString& source,
266  UBool compat, int32_t options,
267  UnicodeString& result,
268  UErrorCode &status);
269 
290  static inline UNormalizationCheckResult
291  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
292 
307  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
308 
329  static inline UBool
330  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
331 
347  static UBool
348  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
349 
379  static UnicodeString &
380  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
381  UnicodeString &result,
382  UNormalizationMode mode, int32_t options,
383  UErrorCode &errorCode);
384 #endif /* U_HIDE_DEPRECATED_API */
385 
450  static inline int32_t
451  compare(const UnicodeString &s1, const UnicodeString &s2,
452  uint32_t options,
453  UErrorCode &errorCode);
454 
455 #ifndef U_HIDE_DEPRECATED_API
456  //-------------------------------------------------------------------------
457  // Iteration API
458  //-------------------------------------------------------------------------
459 
468  UChar32 current(void);
469 
478  UChar32 first(void);
479 
488  UChar32 last(void);
489 
504  UChar32 next(void);
505 
520  UChar32 previous(void);
521 
531  void setIndexOnly(int32_t index);
532 
538  void reset(void);
539 
554  int32_t getIndex(void) const;
555 
564  int32_t startIndex(void) const;
565 
576  int32_t endIndex(void) const;
577 
586  UBool operator==(const Normalizer& that) const;
587 
596  inline UBool operator!=(const Normalizer& that) const;
597 
604  Normalizer* clone() const;
605 
612  int32_t hashCode(void) const;
613 
614  //-------------------------------------------------------------------------
615  // Property access methods
616  //-------------------------------------------------------------------------
617 
633  void setMode(UNormalizationMode newMode);
634 
645  UNormalizationMode getUMode(void) const;
646 
663  void setOption(int32_t option,
664  UBool value);
665 
676  UBool getOption(int32_t option) const;
677 
686  void setText(const UnicodeString& newText,
687  UErrorCode &status);
688 
697  void setText(const CharacterIterator& newText,
698  UErrorCode &status);
699 
709  void setText(ConstChar16Ptr newText,
710  int32_t length,
711  UErrorCode &status);
718  void getText(UnicodeString& result);
719 
725  static UClassID U_EXPORT2 getStaticClassID();
726 #endif /* U_HIDE_DEPRECATED_API */
727 
733  virtual UClassID getDynamicClassID() const;
734 
735 private:
736  //-------------------------------------------------------------------------
737  // Private functions
738  //-------------------------------------------------------------------------
739 
740  Normalizer(); // default constructor not implemented
741  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
742 
743  // Private utility methods for iteration
744  // For documentation, see the source code
745  UBool nextNormalize();
746  UBool previousNormalize();
747 
748  void init();
749  void clearBuffer(void);
750 
751  //-------------------------------------------------------------------------
752  // Private data
753  //-------------------------------------------------------------------------
754 
755  FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
756  const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
757  UNormalizationMode fUMode; // deprecated
758  int32_t fOptions;
759 
760  // The input text and our position in it
761  CharacterIterator *text;
762 
763  // The normalization buffer is the result of normalization
764  // of the source in [currentIndex..nextIndex[ .
765  int32_t currentIndex, nextIndex;
766 
767  // A buffer for holding intermediate results
768  UnicodeString buffer;
769  int32_t bufferPos;
770 };
771 
772 //-------------------------------------------------------------------------
773 // Inline implementations
774 //-------------------------------------------------------------------------
775 
776 #ifndef U_HIDE_DEPRECATED_API
777 inline UBool
779 { return ! operator==(other); }
780 
783  UNormalizationMode mode,
784  UErrorCode &status) {
785  return quickCheck(source, mode, 0, status);
786 }
787 
788 inline UBool
790  UNormalizationMode mode,
791  UErrorCode &status) {
792  return isNormalized(source, mode, 0, status);
793 }
794 #endif /* U_HIDE_DEPRECATED_API */
795 
796 inline int32_t
798  uint32_t options,
799  UErrorCode &errorCode) {
800  // all argument checking is done in unorm_compare
801  return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
802  toUCharPtr(s2.getBuffer()), s2.length(),
803  options,
804  &errorCode);
805 }
806 
807 U_NAMESPACE_END
808 
809 #endif /* #if !UCONFIG_NO_NORMALIZATION */
810 
811 #endif // NORMLZR_H
812 
813 #endif /* U_SHOW_CPLUSPLUS_API */
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode)
Test if a string is in a given normalization form.
Definition: normlzr.h:789
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status)
Performing quick check on a string, to quickly determine if the string is in a particular normalizati...
Definition: normlzr.h:782
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
static int32_t compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode)
Compare two strings for canonical equivalence.
Definition: normlzr.h:797
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:138
C++ API: Unicode String.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C++ API: New API for Unicode Normalization.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:85
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:251
C API: Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:424
virtual UClassID getDynamicClassID() const
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition: char16ptr.h:260
Old Unicode normalization API.
Definition: normlzr.h:136
C++ API: Common ICU base class UObject.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3890
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types...
Definition: char16ptr.h:149
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:300
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
C++ API: Character Iterator.
UBool operator!=(const Normalizer &that) const
Returns FALSE when both iterators refer to the same character in the same input text.
Definition: normlzr.h:778
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:503
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:94
int8_t UBool
The ICU boolean type.
Definition: umachine.h:260