17 #ifndef __UCHARSTRIE_H__ 18 #define __UCHARSTRIE_H__ 28 #if U_SHOW_CPLUSPLUS_API 37 class UCharsTrieBuilder;
70 : ownedArray_(
NULL), uchars_(trieUChars),
71 pos_(uchars_), remainingMatchLength_(-1) {}
86 : ownedArray_(
NULL), uchars_(other.uchars_),
87 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
96 remainingMatchLength_=-1;
100 #ifndef U_HIDE_DRAFT_API 110 return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
111 (uint64_t)(pos_ - uchars_);
129 remainingMatchLength_ =
static_cast<int32_t
>(state >> kState64RemainingShift) - 2;
130 pos_ = uchars_ + (state & kState64PosMask);
150 const char16_t *uchars;
152 int32_t remainingMatchLength;
163 state.uchars=uchars_;
165 state.remainingMatchLength=remainingMatchLength_;
180 if(uchars_==state.uchars && uchars_!=
NULL) {
182 remainingMatchLength_=state.remainingMatchLength;
203 remainingMatchLength_=-1;
204 return nextImpl(uchars_, uchar);
261 const char16_t *pos=pos_;
262 int32_t leadUnit=*pos++;
264 return leadUnit&kValueIsFinal ?
265 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
278 const char16_t *pos=pos_;
280 return pos!=
NULL && findUniqueValue(pos+remainingMatchLength_+1,
FALSE, uniqueValue);
341 UBool hasNext()
const;
371 UBool truncateAndStop() {
377 const char16_t *branchNext(
const char16_t *pos, int32_t length,
UErrorCode &errorCode);
379 const char16_t *uchars_;
380 const char16_t *pos_;
381 const char16_t *initialPos_;
382 int32_t remainingMatchLength_;
383 int32_t initialRemainingMatchLength_;
409 UCharsTrie(char16_t *adoptUChars,
const char16_t *trieUChars)
410 : ownedArray_(adoptUChars), uchars_(trieUChars),
411 pos_(uchars_), remainingMatchLength_(-1) {}
422 static inline int32_t readValue(
const char16_t *pos, int32_t leadUnit) {
424 if(leadUnit<kMinTwoUnitValueLead) {
426 }
else if(leadUnit<kThreeUnitValueLead) {
427 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
429 value=(pos[0]<<16)|pos[1];
433 static inline const char16_t *skipValue(
const char16_t *pos, int32_t leadUnit) {
434 if(leadUnit>=kMinTwoUnitValueLead) {
435 if(leadUnit<kThreeUnitValueLead) {
443 static inline const char16_t *skipValue(
const char16_t *pos) {
444 int32_t leadUnit=*pos++;
445 return skipValue(pos, leadUnit&0x7fff);
448 static inline int32_t readNodeValue(
const char16_t *pos, int32_t leadUnit) {
451 if(leadUnit<kMinTwoUnitNodeValueLead) {
452 value=(leadUnit>>6)-1;
453 }
else if(leadUnit<kThreeUnitNodeValueLead) {
454 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
456 value=(pos[0]<<16)|pos[1];
460 static inline const char16_t *skipNodeValue(
const char16_t *pos, int32_t leadUnit) {
462 if(leadUnit>=kMinTwoUnitNodeValueLead) {
463 if(leadUnit<kThreeUnitNodeValueLead) {
472 static inline const char16_t *jumpByDelta(
const char16_t *pos) {
473 int32_t delta=*pos++;
474 if(delta>=kMinTwoUnitDeltaLead) {
475 if(delta==kThreeUnitDeltaLead) {
476 delta=(pos[0]<<16)|pos[1];
479 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
485 static const char16_t *skipDelta(
const char16_t *pos) {
486 int32_t delta=*pos++;
487 if(delta>=kMinTwoUnitDeltaLead) {
488 if(delta==kThreeUnitDeltaLead) {
502 UStringTrieResult branchNext(
const char16_t *pos, int32_t length, int32_t uchar);
510 static const char16_t *findUniqueValueFromBranch(
const char16_t *pos, int32_t length,
511 UBool haveUniqueValue, int32_t &uniqueValue);
514 static UBool findUniqueValue(
const char16_t *pos,
UBool haveUniqueValue, int32_t &uniqueValue);
518 static void getNextBranchUChars(
const char16_t *pos, int32_t length,
Appendable &out);
563 static const int32_t kMaxBranchLinearSubNodeLength=5;
566 static const int32_t kMinLinearMatch=0x30;
567 static const int32_t kMaxLinearMatchLength=0x10;
572 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
573 static const int32_t kNodeTypeMask=kMinValueLead-1;
576 static const int32_t kValueIsFinal=0x8000;
579 static const int32_t kMaxOneUnitValue=0x3fff;
581 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
582 static const int32_t kThreeUnitValueLead=0x7fff;
584 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
587 static const int32_t kMaxOneUnitNodeValue=0xff;
588 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
589 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
591 static const int32_t kMaxTwoUnitNodeValue=
592 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
595 static const int32_t kMaxOneUnitDelta=0xfbff;
596 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
597 static const int32_t kThreeUnitDeltaLead=0xffff;
599 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
605 static constexpr int32_t kState64RemainingShift = 59;
606 static constexpr uint64_t kState64PosMask = (
UINT64_C(1) << kState64RemainingShift) - 1;
608 char16_t *ownedArray_;
611 const char16_t *uchars_;
616 const char16_t *pos_;
618 int32_t remainingMatchLength_;
625 #endif // __UCHARSTRIE_H__ UCharsTrie & resetToState(const State &state)
Resets this trie to the saved state.
int32_t getValue() const
Returns a matching string's value if called immediately after current()/first()/next() returned USTRI...
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
UCharsTrie(const UCharsTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the char16_t array which...
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all strings reachable from the current state map to the same value.
const UnicodeString & getString() const
UCharsTrie & reset()
Resets this trie to its initial state.
Iterator for all of the (string, value) pairs in a UCharsTrie.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define NULL
Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
Builder class for UCharsTrie.
#define TRUE
The TRUE value of a UBool.
const UCharsTrie & saveState(State &state) const
Saves the state of this trie.
C++ API: Common ICU base class UObject.
State()
Constructs an empty State.
UCharsTrie & resetToState64(uint64_t state)
Resets this trie to the saved state.
uint64_t getState64() const
Returns the state of this trie as a 64-bit integer.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
UCharsTrie state object, for saving a trie's current state and resetting the trie back to this state ...
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types...
C API: Helper definitions for dictionary trie APIs.
UCharsTrie(ConstChar16Ptr trieUChars)
Constructs a UCharsTrie reader instance.
Basic definitions for ICU, for both C and C++ APIs.
#define FALSE
The FALSE value of a UBool.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
The input unit(s) continued a matching string and there is a value for the string so far...
UStringTrieResult first(int32_t uchar)
Traverses the trie from the initial state for this input char16_t.
#define UINT64_C(c)
Provides a platform independent way to specify an unsigned 64-bit integer constant.
UMemory is the common ICU base class.
Light-weight, non-const reader class for a UCharsTrie.
int8_t UBool
The ICU boolean type.
Base class for objects to which Unicode characters and strings can be appended.