ICU 65.1  65.1
Namespaces | Typedefs | Enumerations | Functions
uscript.h File Reference

C API: Unicode Script Information. More...

#include "unicode/utypes.h"

Go to the source code of this file.

Namespaces

 icu
 File coll.h.
 

Typedefs

typedef enum UScriptCode UScriptCode
 Constants for ISO 15924 script codes. More...
 
typedef enum UScriptUsage UScriptUsage
 Script usage constants. More...
 

Enumerations

enum  UScriptCode {
  USCRIPT_INVALID_CODE = -1, USCRIPT_COMMON = 0, USCRIPT_INHERITED = 1, USCRIPT_ARABIC = 2,
  USCRIPT_ARMENIAN = 3, USCRIPT_BENGALI = 4, USCRIPT_BOPOMOFO = 5, USCRIPT_CHEROKEE = 6,
  USCRIPT_COPTIC = 7, USCRIPT_CYRILLIC = 8, USCRIPT_DESERET = 9, USCRIPT_DEVANAGARI = 10,
  USCRIPT_ETHIOPIC = 11, USCRIPT_GEORGIAN = 12, USCRIPT_GOTHIC = 13, USCRIPT_GREEK = 14,
  USCRIPT_GUJARATI = 15, USCRIPT_GURMUKHI = 16, USCRIPT_HAN = 17, USCRIPT_HANGUL = 18,
  USCRIPT_HEBREW = 19, USCRIPT_HIRAGANA = 20, USCRIPT_KANNADA = 21, USCRIPT_KATAKANA = 22,
  USCRIPT_KHMER = 23, USCRIPT_LAO = 24, USCRIPT_LATIN = 25, USCRIPT_MALAYALAM = 26,
  USCRIPT_MONGOLIAN = 27, USCRIPT_MYANMAR = 28, USCRIPT_OGHAM = 29, USCRIPT_OLD_ITALIC = 30,
  USCRIPT_ORIYA = 31, USCRIPT_RUNIC = 32, USCRIPT_SINHALA = 33, USCRIPT_SYRIAC = 34,
  USCRIPT_TAMIL = 35, USCRIPT_TELUGU = 36, USCRIPT_THAANA = 37, USCRIPT_THAI = 38,
  USCRIPT_TIBETAN = 39, USCRIPT_CANADIAN_ABORIGINAL = 40, USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, USCRIPT_YI = 41,
  USCRIPT_TAGALOG = 42, USCRIPT_HANUNOO = 43, USCRIPT_BUHID = 44, USCRIPT_TAGBANWA = 45,
  USCRIPT_BRAILLE = 46, USCRIPT_CYPRIOT = 47, USCRIPT_LIMBU = 48, USCRIPT_LINEAR_B = 49,
  USCRIPT_OSMANYA = 50, USCRIPT_SHAVIAN = 51, USCRIPT_TAI_LE = 52, USCRIPT_UGARITIC = 53,
  USCRIPT_KATAKANA_OR_HIRAGANA = 54, USCRIPT_BUGINESE = 55, USCRIPT_GLAGOLITIC = 56, USCRIPT_KHAROSHTHI = 57,
  USCRIPT_SYLOTI_NAGRI = 58, USCRIPT_NEW_TAI_LUE = 59, USCRIPT_TIFINAGH = 60, USCRIPT_OLD_PERSIAN = 61,
  USCRIPT_BALINESE = 62, USCRIPT_BATAK = 63, USCRIPT_BLISSYMBOLS = 64, USCRIPT_BRAHMI = 65,
  USCRIPT_CHAM = 66, USCRIPT_CIRTH = 67, USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, USCRIPT_DEMOTIC_EGYPTIAN = 69,
  USCRIPT_HIERATIC_EGYPTIAN = 70, USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, USCRIPT_KHUTSURI = 72, USCRIPT_SIMPLIFIED_HAN = 73,
  USCRIPT_TRADITIONAL_HAN = 74, USCRIPT_PAHAWH_HMONG = 75, USCRIPT_OLD_HUNGARIAN = 76, USCRIPT_HARAPPAN_INDUS = 77,
  USCRIPT_JAVANESE = 78, USCRIPT_KAYAH_LI = 79, USCRIPT_LATIN_FRAKTUR = 80, USCRIPT_LATIN_GAELIC = 81,
  USCRIPT_LEPCHA = 82, USCRIPT_LINEAR_A = 83, USCRIPT_MANDAIC = 84, USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
  USCRIPT_MAYAN_HIEROGLYPHS = 85, USCRIPT_MEROITIC_HIEROGLYPHS = 86, USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, USCRIPT_NKO = 87,
  USCRIPT_ORKHON = 88, USCRIPT_OLD_PERMIC = 89, USCRIPT_PHAGS_PA = 90, USCRIPT_PHOENICIAN = 91,
  USCRIPT_MIAO = 92, USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, USCRIPT_RONGORONGO = 93, USCRIPT_SARATI = 94,
  USCRIPT_ESTRANGELO_SYRIAC = 95, USCRIPT_WESTERN_SYRIAC = 96, USCRIPT_EASTERN_SYRIAC = 97, USCRIPT_TENGWAR = 98,
  USCRIPT_VAI = 99, USCRIPT_VISIBLE_SPEECH = 100, USCRIPT_CUNEIFORM = 101, USCRIPT_UNWRITTEN_LANGUAGES = 102,
  USCRIPT_UNKNOWN = 103, USCRIPT_CARIAN = 104, USCRIPT_JAPANESE = 105, USCRIPT_LANNA = 106,
  USCRIPT_LYCIAN = 107, USCRIPT_LYDIAN = 108, USCRIPT_OL_CHIKI = 109, USCRIPT_REJANG = 110,
  USCRIPT_SAURASHTRA = 111, USCRIPT_SIGN_WRITING = 112, USCRIPT_SUNDANESE = 113, USCRIPT_MOON = 114,
  USCRIPT_MEITEI_MAYEK = 115, USCRIPT_IMPERIAL_ARAMAIC = 116, USCRIPT_AVESTAN = 117, USCRIPT_CHAKMA = 118,
  USCRIPT_KOREAN = 119, USCRIPT_KAITHI = 120, USCRIPT_MANICHAEAN = 121, USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,
  USCRIPT_PSALTER_PAHLAVI = 123, USCRIPT_BOOK_PAHLAVI = 124, USCRIPT_INSCRIPTIONAL_PARTHIAN = 125, USCRIPT_SAMARITAN = 126,
  USCRIPT_TAI_VIET = 127, USCRIPT_MATHEMATICAL_NOTATION = 128, USCRIPT_SYMBOLS = 129, USCRIPT_BAMUM = 130,
  USCRIPT_LISU = 131, USCRIPT_NAKHI_GEBA = 132, USCRIPT_OLD_SOUTH_ARABIAN = 133, USCRIPT_BASSA_VAH = 134,
  USCRIPT_DUPLOYAN = 135, USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN, USCRIPT_ELBASAN = 136, USCRIPT_GRANTHA = 137,
  USCRIPT_KPELLE = 138, USCRIPT_LOMA = 139, USCRIPT_MENDE = 140, USCRIPT_MEROITIC_CURSIVE = 141,
  USCRIPT_OLD_NORTH_ARABIAN = 142, USCRIPT_NABATAEAN = 143, USCRIPT_PALMYRENE = 144, USCRIPT_KHUDAWADI = 145,
  USCRIPT_SINDHI = USCRIPT_KHUDAWADI, USCRIPT_WARANG_CITI = 146, USCRIPT_AFAKA = 147, USCRIPT_JURCHEN = 148,
  USCRIPT_MRO = 149, USCRIPT_NUSHU = 150, USCRIPT_SHARADA = 151, USCRIPT_SORA_SOMPENG = 152,
  USCRIPT_TAKRI = 153, USCRIPT_TANGUT = 154, USCRIPT_WOLEAI = 155, USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,
  USCRIPT_KHOJKI = 157, USCRIPT_TIRHUTA = 158, USCRIPT_CAUCASIAN_ALBANIAN = 159, USCRIPT_MAHAJANI = 160,
  USCRIPT_AHOM = 161, USCRIPT_HATRAN = 162, USCRIPT_MODI = 163, USCRIPT_MULTANI = 164,
  USCRIPT_PAU_CIN_HAU = 165, USCRIPT_SIDDHAM = 166, USCRIPT_ADLAM = 167, USCRIPT_BHAIKSUKI = 168,
  USCRIPT_MARCHEN = 169, USCRIPT_NEWA = 170, USCRIPT_OSAGE = 171, USCRIPT_HAN_WITH_BOPOMOFO = 172,
  USCRIPT_JAMO = 173, USCRIPT_SYMBOLS_EMOJI = 174, USCRIPT_MASARAM_GONDI = 175, USCRIPT_SOYOMBO = 176,
  USCRIPT_ZANABAZAR_SQUARE = 177, USCRIPT_DOGRA = 178, USCRIPT_GUNJALA_GONDI = 179, USCRIPT_MAKASAR = 180,
  USCRIPT_MEDEFAIDRIN = 181, USCRIPT_HANIFI_ROHINGYA = 182, USCRIPT_SOGDIAN = 183, USCRIPT_OLD_SOGDIAN = 184,
  USCRIPT_ELYMAIC = 185, USCRIPT_NYIAKENG_PUACHUE_HMONG = 186, USCRIPT_NANDINAGARI = 187, USCRIPT_WANCHO = 188,
  USCRIPT_CODE_LIMIT = 189
}
 Constants for ISO 15924 script codes. More...
 
enum  UScriptUsage {
  USCRIPT_USAGE_NOT_ENCODED, USCRIPT_USAGE_UNKNOWN, USCRIPT_USAGE_EXCLUDED, USCRIPT_USAGE_LIMITED_USE,
  USCRIPT_USAGE_ASPIRATIONAL, USCRIPT_USAGE_RECOMMENDED
}
 Script usage constants. More...
 

Functions

int32_t uscript_getCode (const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
 Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. More...
 
const char * uscript_getName (UScriptCode scriptCode)
 Returns the long Unicode script name, if there is one. More...
 
const char * uscript_getShortName (UScriptCode scriptCode)
 Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script. More...
 
UScriptCode uscript_getScript (UChar32 codepoint, UErrorCode *err)
 Gets the script code associated with the given codepoint. More...
 
UBool uscript_hasScript (UChar32 c, UScriptCode sc)
 Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc. More...
 
int32_t uscript_getScriptExtensions (UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
 Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes. More...
 
int32_t uscript_getSampleString (UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
 Writes the script sample character string. More...
 
U_COMMON_API icu::UnicodeString uscript_getSampleUnicodeString (UScriptCode script)
 Returns the script sample character string. More...
 
UScriptUsage uscript_getUsage (UScriptCode script)
 Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. More...
 
UBool uscript_isRightToLeft (UScriptCode script)
 Returns TRUE if the script is written right-to-left. More...
 
UBool uscript_breaksBetweenLetters (UScriptCode script)
 Returns TRUE if the script allows line breaks between letters (excluding hyphenation). More...
 
UBool uscript_isCased (UScriptCode script)
 Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. More...
 

Detailed Description

C API: Unicode Script Information.

Definition in file uscript.h.

Typedef Documentation

◆ UScriptCode

typedef enum UScriptCode UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Stable:
ICU 2.2

◆ UScriptUsage

typedef enum UScriptUsage UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Stable:
ICU 51

Enumeration Type Documentation

◆ UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Stable:
ICU 2.2
Enumerator
USCRIPT_INVALID_CODE 
Stable:
ICU 2.2
USCRIPT_COMMON 
Stable:
ICU 2.2
USCRIPT_INHERITED 
Stable:
ICU 2.2
USCRIPT_ARABIC 
Stable:
ICU 2.2
USCRIPT_ARMENIAN 
Stable:
ICU 2.2
USCRIPT_BENGALI 
Stable:
ICU 2.2
USCRIPT_BOPOMOFO 
Stable:
ICU 2.2
USCRIPT_CHEROKEE 
Stable:
ICU 2.2
USCRIPT_COPTIC 
Stable:
ICU 2.2
USCRIPT_CYRILLIC 
Stable:
ICU 2.2
USCRIPT_DESERET 
Stable:
ICU 2.2
USCRIPT_DEVANAGARI 
Stable:
ICU 2.2
USCRIPT_ETHIOPIC 
Stable:
ICU 2.2
USCRIPT_GEORGIAN 
Stable:
ICU 2.2
USCRIPT_GOTHIC 
Stable:
ICU 2.2
USCRIPT_GREEK 
Stable:
ICU 2.2
USCRIPT_GUJARATI 
Stable:
ICU 2.2
USCRIPT_GURMUKHI 
Stable:
ICU 2.2
USCRIPT_HAN 
Stable:
ICU 2.2
USCRIPT_HANGUL 
Stable:
ICU 2.2
USCRIPT_HEBREW 
Stable:
ICU 2.2
USCRIPT_HIRAGANA 
Stable:
ICU 2.2
USCRIPT_KANNADA 
Stable:
ICU 2.2
USCRIPT_KATAKANA 
Stable:
ICU 2.2
USCRIPT_KHMER 
Stable:
ICU 2.2
USCRIPT_LAO 
Stable:
ICU 2.2
USCRIPT_LATIN 
Stable:
ICU 2.2
USCRIPT_MALAYALAM 
Stable:
ICU 2.2
USCRIPT_MONGOLIAN 
Stable:
ICU 2.2
USCRIPT_MYANMAR 
Stable:
ICU 2.2
USCRIPT_OGHAM 
Stable:
ICU 2.2
USCRIPT_OLD_ITALIC 
Stable:
ICU 2.2
USCRIPT_ORIYA 
Stable:
ICU 2.2
USCRIPT_RUNIC 
Stable:
ICU 2.2
USCRIPT_SINHALA 
Stable:
ICU 2.2
USCRIPT_SYRIAC 
Stable:
ICU 2.2
USCRIPT_TAMIL 
Stable:
ICU 2.2
USCRIPT_TELUGU 
Stable:
ICU 2.2
USCRIPT_THAANA 
Stable:
ICU 2.2
USCRIPT_THAI 
Stable:
ICU 2.2
USCRIPT_TIBETAN 
Stable:
ICU 2.2
USCRIPT_CANADIAN_ABORIGINAL 

Canadian_Aboriginal script.

Stable:
ICU 2.6
USCRIPT_UCAS 

Canadian_Aboriginal script (alias).

Stable:
ICU 2.2
USCRIPT_YI 
Stable:
ICU 2.2
USCRIPT_TAGALOG 
Stable:
ICU 2.2
USCRIPT_HANUNOO 
Stable:
ICU 2.2
USCRIPT_BUHID 
Stable:
ICU 2.2
USCRIPT_TAGBANWA 
Stable:
ICU 2.2
USCRIPT_BRAILLE 
Stable:
ICU 2.6
USCRIPT_CYPRIOT 
Stable:
ICU 2.6
USCRIPT_LIMBU 
Stable:
ICU 2.6
USCRIPT_LINEAR_B 
Stable:
ICU 2.6
USCRIPT_OSMANYA 
Stable:
ICU 2.6
USCRIPT_SHAVIAN 
Stable:
ICU 2.6
USCRIPT_TAI_LE 
Stable:
ICU 2.6
USCRIPT_UGARITIC 
Stable:
ICU 2.6
USCRIPT_KATAKANA_OR_HIRAGANA 

New script code in Unicode 4.0.1.

Stable:
ICU 3.0
USCRIPT_BUGINESE 
Stable:
ICU 3.4
USCRIPT_GLAGOLITIC 
Stable:
ICU 3.4
USCRIPT_KHAROSHTHI 
Stable:
ICU 3.4
USCRIPT_SYLOTI_NAGRI 
Stable:
ICU 3.4
USCRIPT_NEW_TAI_LUE 
Stable:
ICU 3.4
USCRIPT_TIFINAGH 
Stable:
ICU 3.4
USCRIPT_OLD_PERSIAN 
Stable:
ICU 3.4
USCRIPT_BALINESE 
Stable:
ICU 3.6
USCRIPT_BATAK 
Stable:
ICU 3.6
USCRIPT_BLISSYMBOLS 
Stable:
ICU 3.6
USCRIPT_BRAHMI 
Stable:
ICU 3.6
USCRIPT_CHAM 
Stable:
ICU 3.6
USCRIPT_CIRTH 
Stable:
ICU 3.6
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC 
Stable:
ICU 3.6
USCRIPT_DEMOTIC_EGYPTIAN 
Stable:
ICU 3.6
USCRIPT_HIERATIC_EGYPTIAN 
Stable:
ICU 3.6
USCRIPT_EGYPTIAN_HIEROGLYPHS 
Stable:
ICU 3.6
USCRIPT_KHUTSURI 
Stable:
ICU 3.6
USCRIPT_SIMPLIFIED_HAN 
Stable:
ICU 3.6
USCRIPT_TRADITIONAL_HAN 
Stable:
ICU 3.6
USCRIPT_PAHAWH_HMONG 
Stable:
ICU 3.6
USCRIPT_OLD_HUNGARIAN 
Stable:
ICU 3.6
USCRIPT_HARAPPAN_INDUS 
Stable:
ICU 3.6
USCRIPT_JAVANESE 
Stable:
ICU 3.6
USCRIPT_KAYAH_LI 
Stable:
ICU 3.6
USCRIPT_LATIN_FRAKTUR 
Stable:
ICU 3.6
USCRIPT_LATIN_GAELIC 
Stable:
ICU 3.6
USCRIPT_LEPCHA 
Stable:
ICU 3.6
USCRIPT_LINEAR_A 
Stable:
ICU 3.6
USCRIPT_MANDAIC 
Stable:
ICU 4.6
USCRIPT_MANDAEAN 
Stable:
ICU 3.6
USCRIPT_MAYAN_HIEROGLYPHS 
Stable:
ICU 3.6
USCRIPT_MEROITIC_HIEROGLYPHS 
Stable:
ICU 4.6
USCRIPT_MEROITIC 
Stable:
ICU 3.6
USCRIPT_NKO 
Stable:
ICU 3.6
USCRIPT_ORKHON 
Stable:
ICU 3.6
USCRIPT_OLD_PERMIC 
Stable:
ICU 3.6
USCRIPT_PHAGS_PA 
Stable:
ICU 3.6
USCRIPT_PHOENICIAN 
Stable:
ICU 3.6
USCRIPT_MIAO 
Stable:
ICU 52
USCRIPT_PHONETIC_POLLARD 
Stable:
ICU 3.6
USCRIPT_RONGORONGO 
Stable:
ICU 3.6
USCRIPT_SARATI 
Stable:
ICU 3.6
USCRIPT_ESTRANGELO_SYRIAC 
Stable:
ICU 3.6
USCRIPT_WESTERN_SYRIAC 
Stable:
ICU 3.6
USCRIPT_EASTERN_SYRIAC 
Stable:
ICU 3.6
USCRIPT_TENGWAR 
Stable:
ICU 3.6
USCRIPT_VAI 
Stable:
ICU 3.6
USCRIPT_VISIBLE_SPEECH 
Stable:
ICU 3.6
USCRIPT_CUNEIFORM 
Stable:
ICU 3.6
USCRIPT_UNWRITTEN_LANGUAGES 
Stable:
ICU 3.6
USCRIPT_UNKNOWN 
Stable:
ICU 3.6
USCRIPT_CARIAN 
Stable:
ICU 3.8
USCRIPT_JAPANESE 
Stable:
ICU 3.8
USCRIPT_LANNA 
Stable:
ICU 3.8
USCRIPT_LYCIAN 
Stable:
ICU 3.8
USCRIPT_LYDIAN 
Stable:
ICU 3.8
USCRIPT_OL_CHIKI 
Stable:
ICU 3.8
USCRIPT_REJANG 
Stable:
ICU 3.8
USCRIPT_SAURASHTRA 
Stable:
ICU 3.8
USCRIPT_SIGN_WRITING 

Sutton SignWriting.

Stable:
ICU 3.8
USCRIPT_SUNDANESE 
Stable:
ICU 3.8
USCRIPT_MOON 
Stable:
ICU 3.8
USCRIPT_MEITEI_MAYEK 
Stable:
ICU 3.8
USCRIPT_IMPERIAL_ARAMAIC 
Stable:
ICU 4.0
USCRIPT_AVESTAN 
Stable:
ICU 4.0
USCRIPT_CHAKMA 
Stable:
ICU 4.0
USCRIPT_KOREAN 
Stable:
ICU 4.0
USCRIPT_KAITHI 
Stable:
ICU 4.0
USCRIPT_MANICHAEAN 
Stable:
ICU 4.0
USCRIPT_INSCRIPTIONAL_PAHLAVI 
Stable:
ICU 4.0
USCRIPT_PSALTER_PAHLAVI 
Stable:
ICU 4.0
USCRIPT_BOOK_PAHLAVI 
Stable:
ICU 4.0
USCRIPT_INSCRIPTIONAL_PARTHIAN 
Stable:
ICU 4.0
USCRIPT_SAMARITAN 
Stable:
ICU 4.0
USCRIPT_TAI_VIET 
Stable:
ICU 4.0
USCRIPT_MATHEMATICAL_NOTATION 
Stable:
ICU 4.0
USCRIPT_SYMBOLS 
Stable:
ICU 4.0
USCRIPT_BAMUM 
Stable:
ICU 4.4
USCRIPT_LISU 
Stable:
ICU 4.4
USCRIPT_NAKHI_GEBA 
Stable:
ICU 4.4
USCRIPT_OLD_SOUTH_ARABIAN 
Stable:
ICU 4.4
USCRIPT_BASSA_VAH 
Stable:
ICU 4.6
USCRIPT_DUPLOYAN 
Stable:
ICU 54
USCRIPT_DUPLOYAN_SHORTAND 
Deprecated:
ICU 54 Typo, use USCRIPT_DUPLOYAN
USCRIPT_ELBASAN 
Stable:
ICU 4.6
USCRIPT_GRANTHA 
Stable:
ICU 4.6
USCRIPT_KPELLE 
Stable:
ICU 4.6
USCRIPT_LOMA 
Stable:
ICU 4.6
USCRIPT_MENDE 

Mende Kikakui.

Stable:
ICU 4.6
USCRIPT_MEROITIC_CURSIVE 
Stable:
ICU 4.6
USCRIPT_OLD_NORTH_ARABIAN 
Stable:
ICU 4.6
USCRIPT_NABATAEAN 
Stable:
ICU 4.6
USCRIPT_PALMYRENE 
Stable:
ICU 4.6
USCRIPT_KHUDAWADI 
Stable:
ICU 54
USCRIPT_SINDHI 
Stable:
ICU 4.6
USCRIPT_WARANG_CITI 
Stable:
ICU 4.6
USCRIPT_AFAKA 
Stable:
ICU 4.8
USCRIPT_JURCHEN 
Stable:
ICU 4.8
USCRIPT_MRO 
Stable:
ICU 4.8
USCRIPT_NUSHU 
Stable:
ICU 4.8
USCRIPT_SHARADA 
Stable:
ICU 4.8
USCRIPT_SORA_SOMPENG 
Stable:
ICU 4.8
USCRIPT_TAKRI 
Stable:
ICU 4.8
USCRIPT_TANGUT 
Stable:
ICU 4.8
USCRIPT_WOLEAI 
Stable:
ICU 4.8
USCRIPT_ANATOLIAN_HIEROGLYPHS 
Stable:
ICU 49
USCRIPT_KHOJKI 
Stable:
ICU 49
USCRIPT_TIRHUTA 
Stable:
ICU 49
USCRIPT_CAUCASIAN_ALBANIAN 
Stable:
ICU 52
USCRIPT_MAHAJANI 
Stable:
ICU 52
USCRIPT_AHOM 
Stable:
ICU 54
USCRIPT_HATRAN 
Stable:
ICU 54
USCRIPT_MODI 
Stable:
ICU 54
USCRIPT_MULTANI 
Stable:
ICU 54
USCRIPT_PAU_CIN_HAU 
Stable:
ICU 54
USCRIPT_SIDDHAM 
Stable:
ICU 54
USCRIPT_ADLAM 
Stable:
ICU 58
USCRIPT_BHAIKSUKI 
Stable:
ICU 58
USCRIPT_MARCHEN 
Stable:
ICU 58
USCRIPT_NEWA 
Stable:
ICU 58
USCRIPT_OSAGE 
Stable:
ICU 58
USCRIPT_HAN_WITH_BOPOMOFO 
Stable:
ICU 58
USCRIPT_JAMO 
Stable:
ICU 58
USCRIPT_SYMBOLS_EMOJI 
Stable:
ICU 58
USCRIPT_MASARAM_GONDI 
Stable:
ICU 60
USCRIPT_SOYOMBO 
Stable:
ICU 60
USCRIPT_ZANABAZAR_SQUARE 
Stable:
ICU 60
USCRIPT_DOGRA 
Stable:
ICU 62
USCRIPT_GUNJALA_GONDI 
Stable:
ICU 62
USCRIPT_MAKASAR 
Stable:
ICU 62
USCRIPT_MEDEFAIDRIN 
Stable:
ICU 62
USCRIPT_HANIFI_ROHINGYA 
Stable:
ICU 62
USCRIPT_SOGDIAN 
Stable:
ICU 62
USCRIPT_OLD_SOGDIAN 
Stable:
ICU 62
USCRIPT_ELYMAIC 
Stable:
ICU 64
USCRIPT_NYIAKENG_PUACHUE_HMONG 
Stable:
ICU 64
USCRIPT_NANDINAGARI 
Stable:
ICU 64
USCRIPT_WANCHO 
Stable:
ICU 64
USCRIPT_CODE_LIMIT 

One more than the highest normal UScriptCode value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).

Deprecated:
ICU 58 The numeric value may change over time, see ICU ticket #12420.

Definition at line 54 of file uscript.h.

◆ UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Stable:
ICU 51
Enumerator
USCRIPT_USAGE_NOT_ENCODED 

Not encoded in Unicode.

Stable:
ICU 51
USCRIPT_USAGE_UNKNOWN 

Unknown script usage.

Stable:
ICU 51
USCRIPT_USAGE_EXCLUDED 

Candidate for Exclusion from Identifiers.

Stable:
ICU 51
USCRIPT_USAGE_LIMITED_USE 

Limited Use script.

Stable:
ICU 51
USCRIPT_USAGE_ASPIRATIONAL 

Aspirational Use script.

Stable:
ICU 51
USCRIPT_USAGE_RECOMMENDED 

Recommended script.

Stable:
ICU 51

Definition at line 603 of file uscript.h.

Function Documentation

◆ uscript_breaksBetweenLetters()

UBool uscript_breaksBetweenLetters ( UScriptCode  script)

Returns TRUE if the script allows line breaks between letters (excluding hyphenation).

Such a script typically requires dictionary-based line breaking. For example, Hani and Thai.

Parameters
scriptscript code
Returns
TRUE if the script allows line breaks between letters
Stable:
ICU 51

◆ uscript_getCode()

int32_t uscript_getCode ( const char *  nameOrAbbrOrLocale,
UScriptCode fillIn,
int32_t  capacity,
UErrorCode err 
)

Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.

Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If the required capacity is greater than the capacity of the destination buffer, then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.

Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does a fast lookup with no access of the locale data.

Parameters
nameOrAbbrOrLocalename of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale
fillInthe UScriptCode buffer to fill in the script code
capacitythe capacity (size) of UScriptCode buffer passed in.
errthe error status code.
Returns
The number of script codes filled in the buffer passed in
Stable:
ICU 2.4

◆ uscript_getName()

const char* uscript_getName ( UScriptCode  scriptCode)

Returns the long Unicode script name, if there is one.

Otherwise returns the 4-letter ISO 15924 script code. Returns "Malayam" given USCRIPT_MALAYALAM.

Parameters
scriptCodeUScriptCode enum
Returns
long script name as given in PropertyValueAliases.txt, or the 4-letter code, or NULL if scriptCode is invalid
Stable:
ICU 2.4

◆ uscript_getSampleString()

int32_t uscript_getSampleString ( UScriptCode  script,
UChar dest,
int32_t  capacity,
UErrorCode pErrorCode 
)

Writes the script sample character string.

This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.

Parameters
scriptscript code
destoutput string array
capacitynumber of UChars in the dest array
pErrorCodestandard ICU in/out error code, must pass U_SUCCESS() on input
Returns
the string length, even if U_BUFFER_OVERFLOW_ERROR
Stable:
ICU 51

◆ uscript_getSampleUnicodeString()

U_COMMON_API icu::UnicodeString uscript_getSampleUnicodeString ( UScriptCode  script)

Returns the script sample character string.

This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.

Parameters
scriptscript code
Returns
the sample character string
Stable:
ICU 51

◆ uscript_getScript()

UScriptCode uscript_getScript ( UChar32  codepoint,
UErrorCode err 
)

Gets the script code associated with the given codepoint.

Returns USCRIPT_MALAYALAM given 0x0D02

Parameters
codepointUChar32 codepoint
errthe error status code.
Returns
The UScriptCode, or 0 if codepoint is invalid
Stable:
ICU 2.4

◆ uscript_getScriptExtensions()

int32_t uscript_getScriptExtensions ( UChar32  c,
UScriptCode scripts,
int32_t  capacity,
UErrorCode errorCode 
)

Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.

  • If c does have Script_Extensions, then the Script property value (normally Common or Inherited) is not included.
  • If c does not have Script_Extensions, then the one Script code is written to the output array.
  • If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. In other words, if the return value is 1, then the output array contains exactly c's single Script code. If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

If there are more than capacity script codes to be written, then U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. (Usual ICU buffer handling behavior.)

Parameters
ccode point
scriptsoutput script code array
capacitycapacity of the scripts array
errorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
number of script codes in c's Script_Extensions, or 1 for the single Script value, written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
Stable:
ICU 49

◆ uscript_getShortName()

const char* uscript_getShortName ( UScriptCode  scriptCode)

Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.

Returns "Mlym" given USCRIPT_MALAYALAM.

Parameters
scriptCodeUScriptCode enum
Returns
short script name (4-letter code), or NULL if scriptCode is invalid
Stable:
ICU 2.4

◆ uscript_getUsage()

UScriptUsage uscript_getUsage ( UScriptCode  script)

Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.

Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.

Parameters
scriptscript code
Returns
script usage
See also
UScriptUsage
Stable:
ICU 51

◆ uscript_hasScript()

UBool uscript_hasScript ( UChar32  c,
UScriptCode  sc 
)

Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

Parameters
ccode point
scscript code
Returns
TRUE if sc is in Script_Extensions(c)
Stable:
ICU 49

◆ uscript_isCased()

UBool uscript_isCased ( UScriptCode  script)

Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.

For example, Latn and Cyrl.

Parameters
scriptscript code
Returns
TRUE if the script is cased
Stable:
ICU 51

◆ uscript_isRightToLeft()

UBool uscript_isRightToLeft ( UScriptCode  script)

Returns TRUE if the script is written right-to-left.

For example, Arab and Hebr.

Parameters
scriptscript code
Returns
TRUE if the script is right-to-left
Stable:
ICU 51