ICU 65.1  65.1
caniter.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 #ifndef CANITER_H
11 #define CANITER_H
12 
13 #include "unicode/utypes.h"
14 
15 #if U_SHOW_CPLUSPLUS_API
16 
17 #if !UCONFIG_NO_NORMALIZATION
18 
19 #include "unicode/uobject.h"
20 #include "unicode/unistr.h"
21 
31 #ifndef CANITER_SKIP_ZEROES
32 #define CANITER_SKIP_ZEROES TRUE
33 #endif
34 
35 U_NAMESPACE_BEGIN
36 
37 class Hashtable;
38 class Normalizer2;
39 class Normalizer2Impl;
40 
77 public:
84  CanonicalIterator(const UnicodeString &source, UErrorCode &status);
85 
90  virtual ~CanonicalIterator();
91 
97  UnicodeString getSource();
98 
103  void reset();
104 
112  UnicodeString next();
113 
121  void setSource(const UnicodeString &newSource, UErrorCode &status);
122 
123 #ifndef U_HIDE_INTERNAL_API
124 
133  static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
134 #endif /* U_HIDE_INTERNAL_API */
135 
141  static UClassID U_EXPORT2 getStaticClassID();
142 
148  virtual UClassID getDynamicClassID() const;
149 
150 private:
151  // ===================== PRIVATES ==============================
152  // private default constructor
154 
155 
160  CanonicalIterator(const CanonicalIterator& other);
161 
166  CanonicalIterator& operator=(const CanonicalIterator& other);
167 
168  // fields
169  UnicodeString source;
170  UBool done;
171 
172  // 2 dimensional array holds the pieces of the string with
173  // their different canonically equivalent representations
174  UnicodeString **pieces;
175  int32_t pieces_length;
176  int32_t *pieces_lengths;
177 
178  // current is used in iterating to combine pieces
179  int32_t *current;
180  int32_t current_length;
181 
182  // transient fields
183  UnicodeString buffer;
184 
185  const Normalizer2 &nfd;
186  const Normalizer2Impl &nfcImpl;
187 
188  // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
189  UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
190 
191  //Set getEquivalents2(String segment);
192  Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
193  //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
194 
200  //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
201  Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
202  //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
203 
204  void cleanPieces();
205 
206 };
207 
208 U_NAMESPACE_END
209 
210 #endif /* #if !UCONFIG_NO_NORMALIZATION */
211 
212 #endif /* U_SHOW_CPLUSPLUS_API */
213 
214 #endif
This class allows one to iterate through all the strings that are canonically equivalent to a given s...
Definition: caniter.h:76
C++ API: Unicode String.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:85
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
virtual UClassID getDynamicClassID() const
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
C++ API: Common ICU base class UObject.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_FINAL
Defined to the C++11 "final" keyword if available.
Definition: umachine.h:140
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:300
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261