Skip to content

Instantly share code, notes, and snippets.

@o11c
Created March 28, 2017 15:58
Show Gist options
  • Select an option

  • Save o11c/84e8f05a29dfdee3503f6bb0fc6f0f4e to your computer and use it in GitHub Desktop.

Select an option

Save o11c/84e8f05a29dfdee3503f6bb0fc6f0f4e to your computer and use it in GitHub Desktop.
import collections.abc
class ClassDict(collections.abc.MutableMapping):
''' Wrapper for cls.__dict__ that supports assignment.
'''
def __init__(self, cls):
self.cls = cls
self.dct = cls.__dict__
def __getitem__(self, k):
return self.dct[k]
def __setitem__(self, k, v):
setattr(self.cls, k, v)
def __delitem__(self, k):
delattr(self.cls, k)
def __iter__(self):
return iter(self.dct)
def __len__(self):
return len(self.dct)
# Contents of PropertyAliases.txt and PropertyValueAliases.txt are
# hard-coded here, then just checked during UCD loading.
#
# Also other internal stuff! This turns out to be really useful.
# (there's no reason a "property" has to apply to a single codepoint)
from . import prop
__all__ = []
def _init_classes():
g = globals()
# implementation internals
prop.make_raw(g, ['X_Schematic_Name', 'X_Schematic_Name'], min='9999.0.0')
prop.make_enum(g, ['X_Shaping_Environment', 'X_Shaping_Environment'], [
('isolate', 'isolate'),
('initial', 'initial'),
('medial', 'medial'),
('final', 'final'),
], min='9999.0.0')
prop.make_set(g, ['X_Shaping_Environment_Set', 'X_Shaping_Environment_Set'], X_Shaping_Environment, min='9999.0.0')
prop.make_enum(g, ['X_Case_Folding_Status', 'X_Case_Folding_Status'], [
('C', 'Common'),
('F', 'Full'),
('S', 'Simple'),
('T', 'Turkic'),
], min='9999.0.0')
# Numeric Properties
prop.make_int(g, ['cjkAccountingNumeric', 'kAccountingNumeric'], min='9.0.0')
prop.make_int(g, ['cjkOtherNumeric', 'kOtherNumeric'], min='9.0.0')
prop.make_int(g, ['cjkPrimaryNumeric', 'kPrimaryNumeric'], min='9.0.0')
prop.make_rational(g, ['nv', 'Numeric_Value'], min='9.0.0')
# String Properties
prop.make_code_seq(g, ['cf', 'Case_Folding'], min='9.0.0')
prop.make_u_code(g, ['cjkCompatibilityVariant', 'kCompatibilityVariant'], min='9.0.0')
prop.make_code_seq(g, ['dm', 'Decomposition_Mapping'], min='9.0.0') # stored in file with <dt> ahead
prop.make_code_seq(g, ['FC_NFKC', 'FC_NFKC_Closure'], min='9.0.0')
prop.make_code_seq(g, ['lc', 'Lowercase_Mapping'], min='9.0.0')
prop.make_code_seq(g, ['NFKC_CF', 'NFKC_Casefold'], min='9.0.0')
prop.make_code(g, ['scf', 'Simple_Case_Folding', 'sfc'], min='9.0.0')
prop.make_code(g, ['slc', 'Simple_Lowercase_Mapping'], min='9.0.0')
prop.make_code(g, ['stc', 'Simple_Titlecase_Mapping'], min='9.0.0')
prop.make_code(g, ['suc', 'Simple_Uppercase_Mapping'], min='9.0.0')
prop.make_code_seq(g, ['tc', 'Titlecase_Mapping'], min='9.0.0')
prop.make_code_seq(g, ['uc', 'Uppercase_Mapping'], min='9.0.0')
# Miscellaneous Properties
prop.make_code(g, ['bmg', 'Bidi_Mirroring_Glyph'], min='9.0.0')
prop.make_code(g, ['bpb', 'Bidi_Paired_Bracket'], min='9.0.0')
prop.make_raw(g, ['cjkIICore', 'kIICore'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_GSource', 'kIRG_GSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_HSource', 'kIRG_HSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_JSource', 'kIRG_JSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_KPSource', 'kIRG_KPSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_KSource', 'kIRG_KSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_MSource', 'kIRG_MSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_TSource', 'kIRG_TSource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_USource', 'kIRG_USource'], min='9.0.0')
prop.make_raw(g, ['cjkIRG_VSource', 'kIRG_VSource'], min='9.0.0')
prop.make_raw(g, ['cjkRSUnicode', 'kRSUnicode', 'Unicode_Radical_Stroke', 'URS'], min='9.0.0')
prop.make_raw(g, ['isc', 'ISO_Comment'], min='9.0.0')
prop.make_enum(g, ['JSN', 'Jamo_Short_Name'], [
# This special case is not listed in PropertyValueAliases.txt,
# but is used in the Jamo file.
('', ''),
('A', 'A'),
('AE', 'AE'),
('B', 'B'),
('BB', 'BB'),
('BS', 'BS'),
('C', 'C'),
('D', 'D'),
('DD', 'DD'),
('E', 'E'),
('EO', 'EO'),
('EU', 'EU'),
('G', 'G'),
('GG', 'GG'),
('GS', 'GS'),
('H', 'H'),
('I', 'I'),
('J', 'J'),
('JJ', 'JJ'),
('K', 'K'),
('L', 'L'),
('LB', 'LB'),
('LG', 'LG'),
('LH', 'LH'),
('LM', 'LM'),
('LP', 'LP'),
('LS', 'LS'),
('LT', 'LT'),
('M', 'M'),
('N', 'N'),
('NG', 'NG'),
('NH', 'NH'),
('NJ', 'NJ'),
('O', 'O'),
('OE', 'OE'),
('P', 'P'),
('R', 'R'),
('S', 'S'),
('SS', 'SS'),
('T', 'T'),
('U', 'U'),
('WA', 'WA'),
('WAE', 'WAE'),
('WE', 'WE'),
('WEO', 'WEO'),
('WI', 'WI'),
('YA', 'YA'),
('YAE', 'YAE'),
('YE', 'YE'),
('YEO', 'YEO'),
('YI', 'YI'),
('YO', 'YO'),
('YU', 'YU'),
], min='9.0.0')
prop.make_raw(g, ['na', 'Name'], min='9.0.0')
prop.make_raw(g, ['na1', 'Unicode_1_Name'], min='9.0.0')
prop.make_raw(g, ['Name_Alias', 'Name_Alias'], min='9.0.0')
# Moved to be *after* Script
#prop.make_set(g, ['scx', 'Script_Extensions'], Script, min='9.0.0')
# Catalog Properties
prop.make_enum(g, ['age', 'Age'], [
('1.1', 'V1_1'),
('2.0', 'V2_0'),
('2.1', 'V2_1'),
('3.0', 'V3_0'),
('3.1', 'V3_1'),
('3.2', 'V3_2'),
('4.0', 'V4_0'),
('4.1', 'V4_1'),
('5.0', 'V5_0'),
('5.1', 'V5_1'),
('5.2', 'V5_2'),
('6.0', 'V6_0'),
('6.1', 'V6_1'),
('6.2', 'V6_2'),
('6.3', 'V6_3'),
('7.0', 'V7_0'),
('8.0', 'V8_0'),
('9.0', 'V9_0'),
('NA', 'Unassigned'),
], catalog=True, min='9.0.0')
prop.make_enum(g, ['blk', 'Block'], [
('Adlam', 'Adlam'),
('Aegean_Numbers', 'Aegean_Numbers'),
('Ahom', 'Ahom'),
('Alchemical', 'Alchemical_Symbols'),
('Alphabetic_PF', 'Alphabetic_Presentation_Forms'),
('Anatolian_Hieroglyphs', 'Anatolian_Hieroglyphs'),
('Ancient_Greek_Music', 'Ancient_Greek_Musical_Notation'),
('Ancient_Greek_Numbers', 'Ancient_Greek_Numbers'),
('Ancient_Symbols', 'Ancient_Symbols'),
('Arabic', 'Arabic'),
('Arabic_Ext_A', 'Arabic_Extended_A'),
('Arabic_Math', 'Arabic_Mathematical_Alphabetic_Symbols'),
('Arabic_PF_A', 'Arabic_Presentation_Forms_A', 'Arabic_Presentation_Forms-A'),
('Arabic_PF_B', 'Arabic_Presentation_Forms_B'),
('Arabic_Sup', 'Arabic_Supplement'),
('Armenian', 'Armenian'),
('Arrows', 'Arrows'),
('ASCII', 'Basic_Latin'),
('Avestan', 'Avestan'),
('Balinese', 'Balinese'),
('Bamum', 'Bamum'),
('Bamum_Sup', 'Bamum_Supplement'),
('Bassa_Vah', 'Bassa_Vah'),
('Batak', 'Batak'),
('Bengali', 'Bengali'),
('Bhaiksuki', 'Bhaiksuki'),
('Block_Elements', 'Block_Elements'),
('Bopomofo', 'Bopomofo'),
('Bopomofo_Ext', 'Bopomofo_Extended'),
('Box_Drawing', 'Box_Drawing'),
('Brahmi', 'Brahmi'),
('Braille', 'Braille_Patterns'),
('Buginese', 'Buginese'),
('Buhid', 'Buhid'),
('Byzantine_Music', 'Byzantine_Musical_Symbols'),
('Carian', 'Carian'),
('Caucasian_Albanian', 'Caucasian_Albanian'),
('Chakma', 'Chakma'),
('Cham', 'Cham'),
('Cherokee', 'Cherokee'),
('Cherokee_Sup', 'Cherokee_Supplement'),
('CJK', 'CJK_Unified_Ideographs'),
('CJK_Compat', 'CJK_Compatibility'),
('CJK_Compat_Forms', 'CJK_Compatibility_Forms'),
('CJK_Compat_Ideographs', 'CJK_Compatibility_Ideographs'),
('CJK_Compat_Ideographs_Sup', 'CJK_Compatibility_Ideographs_Supplement'),
('CJK_Ext_A', 'CJK_Unified_Ideographs_Extension_A'),
('CJK_Ext_B', 'CJK_Unified_Ideographs_Extension_B'),
('CJK_Ext_C', 'CJK_Unified_Ideographs_Extension_C'),
('CJK_Ext_D', 'CJK_Unified_Ideographs_Extension_D'),
('CJK_Ext_E', 'CJK_Unified_Ideographs_Extension_E'),
('CJK_Radicals_Sup', 'CJK_Radicals_Supplement'),
('CJK_Strokes', 'CJK_Strokes'),
('CJK_Symbols', 'CJK_Symbols_And_Punctuation'),
('Compat_Jamo', 'Hangul_Compatibility_Jamo'),
('Control_Pictures', 'Control_Pictures'),
('Coptic', 'Coptic'),
('Coptic_Epact_Numbers', 'Coptic_Epact_Numbers'),
('Counting_Rod', 'Counting_Rod_Numerals'),
('Cuneiform', 'Cuneiform'),
('Cuneiform_Numbers', 'Cuneiform_Numbers_And_Punctuation'),
('Currency_Symbols', 'Currency_Symbols'),
('Cypriot_Syllabary', 'Cypriot_Syllabary'),
('Cyrillic', 'Cyrillic'),
('Cyrillic_Ext_A', 'Cyrillic_Extended_A'),
('Cyrillic_Ext_B', 'Cyrillic_Extended_B'),
('Cyrillic_Ext_C', 'Cyrillic_Extended_C'),
('Cyrillic_Sup', 'Cyrillic_Supplement', 'Cyrillic_Supplementary'),
('Deseret', 'Deseret'),
('Devanagari', 'Devanagari'),
('Devanagari_Ext', 'Devanagari_Extended'),
('Diacriticals', 'Combining_Diacritical_Marks'),
('Diacriticals_Ext', 'Combining_Diacritical_Marks_Extended'),
('Diacriticals_For_Symbols', 'Combining_Diacritical_Marks_For_Symbols', 'Combining_Marks_For_Symbols'),
('Diacriticals_Sup', 'Combining_Diacritical_Marks_Supplement'),
('Dingbats', 'Dingbats'),
('Domino', 'Domino_Tiles'),
('Duployan', 'Duployan'),
('Early_Dynastic_Cuneiform', 'Early_Dynastic_Cuneiform'),
('Egyptian_Hieroglyphs', 'Egyptian_Hieroglyphs'),
('Elbasan', 'Elbasan'),
('Emoticons', 'Emoticons'),
('Enclosed_Alphanum', 'Enclosed_Alphanumerics'),
('Enclosed_Alphanum_Sup', 'Enclosed_Alphanumeric_Supplement'),
('Enclosed_CJK', 'Enclosed_CJK_Letters_And_Months'),
('Enclosed_Ideographic_Sup', 'Enclosed_Ideographic_Supplement'),
('Ethiopic', 'Ethiopic'),
('Ethiopic_Ext', 'Ethiopic_Extended'),
('Ethiopic_Ext_A', 'Ethiopic_Extended_A'),
('Ethiopic_Sup', 'Ethiopic_Supplement'),
('Geometric_Shapes', 'Geometric_Shapes'),
('Geometric_Shapes_Ext', 'Geometric_Shapes_Extended'),
('Georgian', 'Georgian'),
('Georgian_Sup', 'Georgian_Supplement'),
('Glagolitic', 'Glagolitic'),
('Glagolitic_Sup', 'Glagolitic_Supplement'),
('Gothic', 'Gothic'),
('Grantha', 'Grantha'),
('Greek', 'Greek_And_Coptic'),
('Greek_Ext', 'Greek_Extended'),
('Gujarati', 'Gujarati'),
('Gurmukhi', 'Gurmukhi'),
('Half_And_Full_Forms', 'Halfwidth_And_Fullwidth_Forms'),
('Half_Marks', 'Combining_Half_Marks'),
('Hangul', 'Hangul_Syllables'),
('Hanunoo', 'Hanunoo'),
('Hatran', 'Hatran'),
('Hebrew', 'Hebrew'),
('High_PU_Surrogates', 'High_Private_Use_Surrogates'),
('High_Surrogates', 'High_Surrogates'),
('Hiragana', 'Hiragana'),
('IDC', 'Ideographic_Description_Characters'),
('Ideographic_Symbols', 'Ideographic_Symbols_And_Punctuation'),
('Imperial_Aramaic', 'Imperial_Aramaic'),
('Indic_Number_Forms', 'Common_Indic_Number_Forms'),
('Inscriptional_Pahlavi', 'Inscriptional_Pahlavi'),
('Inscriptional_Parthian', 'Inscriptional_Parthian'),
('IPA_Ext', 'IPA_Extensions'),
('Jamo', 'Hangul_Jamo'),
('Jamo_Ext_A', 'Hangul_Jamo_Extended_A'),
('Jamo_Ext_B', 'Hangul_Jamo_Extended_B'),
('Javanese', 'Javanese'),
('Kaithi', 'Kaithi'),
('Kana_Sup', 'Kana_Supplement'),
('Kanbun', 'Kanbun'),
('Kangxi', 'Kangxi_Radicals'),
('Kannada', 'Kannada'),
('Katakana', 'Katakana'),
('Katakana_Ext', 'Katakana_Phonetic_Extensions'),
('Kayah_Li', 'Kayah_Li'),
('Kharoshthi', 'Kharoshthi'),
('Khmer', 'Khmer'),
('Khmer_Symbols', 'Khmer_Symbols'),
('Khojki', 'Khojki'),
('Khudawadi', 'Khudawadi'),
('Lao', 'Lao'),
('Latin_1_Sup', 'Latin_1_Supplement', 'Latin_1'),
('Latin_Ext_A', 'Latin_Extended_A'),
('Latin_Ext_Additional', 'Latin_Extended_Additional'),
('Latin_Ext_B', 'Latin_Extended_B'),
('Latin_Ext_C', 'Latin_Extended_C'),
('Latin_Ext_D', 'Latin_Extended_D'),
('Latin_Ext_E', 'Latin_Extended_E'),
('Lepcha', 'Lepcha'),
('Letterlike_Symbols', 'Letterlike_Symbols'),
('Limbu', 'Limbu'),
('Linear_A', 'Linear_A'),
('Linear_B_Ideograms', 'Linear_B_Ideograms'),
('Linear_B_Syllabary', 'Linear_B_Syllabary'),
('Lisu', 'Lisu'),
('Low_Surrogates', 'Low_Surrogates'),
('Lycian', 'Lycian'),
('Lydian', 'Lydian'),
('Mahajani', 'Mahajani'),
('Mahjong', 'Mahjong_Tiles'),
('Malayalam', 'Malayalam'),
('Mandaic', 'Mandaic'),
('Manichaean', 'Manichaean'),
('Marchen', 'Marchen'),
('Math_Alphanum', 'Mathematical_Alphanumeric_Symbols'),
('Math_Operators', 'Mathematical_Operators'),
('Meetei_Mayek', 'Meetei_Mayek'),
('Meetei_Mayek_Ext', 'Meetei_Mayek_Extensions'),
('Mende_Kikakui', 'Mende_Kikakui'),
('Meroitic_Cursive', 'Meroitic_Cursive'),
('Meroitic_Hieroglyphs', 'Meroitic_Hieroglyphs'),
('Miao', 'Miao'),
('Misc_Arrows', 'Miscellaneous_Symbols_And_Arrows'),
('Misc_Math_Symbols_A', 'Miscellaneous_Mathematical_Symbols_A'),
('Misc_Math_Symbols_B', 'Miscellaneous_Mathematical_Symbols_B'),
('Misc_Pictographs', 'Miscellaneous_Symbols_And_Pictographs'),
('Misc_Symbols', 'Miscellaneous_Symbols'),
('Misc_Technical', 'Miscellaneous_Technical'),
('Modi', 'Modi'),
('Modifier_Letters', 'Spacing_Modifier_Letters'),
('Modifier_Tone_Letters', 'Modifier_Tone_Letters'),
('Mongolian', 'Mongolian'),
('Mongolian_Sup', 'Mongolian_Supplement'),
('Mro', 'Mro'),
('Multani', 'Multani'),
('Music', 'Musical_Symbols'),
('Myanmar', 'Myanmar'),
('Myanmar_Ext_A', 'Myanmar_Extended_A'),
('Myanmar_Ext_B', 'Myanmar_Extended_B'),
('Nabataean', 'Nabataean'),
('NB', 'No_Block'),
('New_Tai_Lue', 'New_Tai_Lue'),
('Newa', 'Newa'),
('NKo', 'NKo'),
('Number_Forms', 'Number_Forms'),
('OCR', 'Optical_Character_Recognition'),
('Ogham', 'Ogham'),
('Ol_Chiki', 'Ol_Chiki'),
('Old_Hungarian', 'Old_Hungarian'),
('Old_Italic', 'Old_Italic'),
('Old_North_Arabian', 'Old_North_Arabian'),
('Old_Permic', 'Old_Permic'),
('Old_Persian', 'Old_Persian'),
('Old_South_Arabian', 'Old_South_Arabian'),
('Old_Turkic', 'Old_Turkic'),
('Oriya', 'Oriya'),
('Ornamental_Dingbats', 'Ornamental_Dingbats'),
('Osage', 'Osage'),
('Osmanya', 'Osmanya'),
('Pahawh_Hmong', 'Pahawh_Hmong'),
('Palmyrene', 'Palmyrene'),
('Pau_Cin_Hau', 'Pau_Cin_Hau'),
('Phags_Pa', 'Phags_Pa'),
('Phaistos', 'Phaistos_Disc'),
('Phoenician', 'Phoenician'),
('Phonetic_Ext', 'Phonetic_Extensions'),
('Phonetic_Ext_Sup', 'Phonetic_Extensions_Supplement'),
('Playing_Cards', 'Playing_Cards'),
('Psalter_Pahlavi', 'Psalter_Pahlavi'),
('PUA', 'Private_Use_Area', 'Private_Use'),
('Punctuation', 'General_Punctuation'),
('Rejang', 'Rejang'),
('Rumi', 'Rumi_Numeral_Symbols'),
('Runic', 'Runic'),
('Samaritan', 'Samaritan'),
('Saurashtra', 'Saurashtra'),
('Sharada', 'Sharada'),
('Shavian', 'Shavian'),
('Shorthand_Format_Controls', 'Shorthand_Format_Controls'),
('Siddham', 'Siddham'),
('Sinhala', 'Sinhala'),
('Sinhala_Archaic_Numbers', 'Sinhala_Archaic_Numbers'),
('Small_Forms', 'Small_Form_Variants'),
('Sora_Sompeng', 'Sora_Sompeng'),
('Specials', 'Specials'),
('Sundanese', 'Sundanese'),
('Sundanese_Sup', 'Sundanese_Supplement'),
('Sup_Arrows_A', 'Supplemental_Arrows_A'),
('Sup_Arrows_B', 'Supplemental_Arrows_B'),
('Sup_Arrows_C', 'Supplemental_Arrows_C'),
('Sup_Math_Operators', 'Supplemental_Mathematical_Operators'),
('Sup_PUA_A', 'Supplementary_Private_Use_Area_A'),
('Sup_PUA_B', 'Supplementary_Private_Use_Area_B'),
('Sup_Punctuation', 'Supplemental_Punctuation'),
('Sup_Symbols_And_Pictographs', 'Supplemental_Symbols_And_Pictographs'),
('Super_And_Sub', 'Superscripts_And_Subscripts'),
('Sutton_SignWriting', 'Sutton_SignWriting'),
('Syloti_Nagri', 'Syloti_Nagri'),
('Syriac', 'Syriac'),
('Tagalog', 'Tagalog'),
('Tagbanwa', 'Tagbanwa'),
('Tags', 'Tags'),
('Tai_Le', 'Tai_Le'),
('Tai_Tham', 'Tai_Tham'),
('Tai_Viet', 'Tai_Viet'),
('Tai_Xuan_Jing', 'Tai_Xuan_Jing_Symbols'),
('Takri', 'Takri'),
('Tamil', 'Tamil'),
('Tangut', 'Tangut'),
('Tangut_Components', 'Tangut_Components'),
('Telugu', 'Telugu'),
('Thaana', 'Thaana'),
('Thai', 'Thai'),
('Tibetan', 'Tibetan'),
('Tifinagh', 'Tifinagh'),
('Tirhuta', 'Tirhuta'),
('Transport_And_Map', 'Transport_And_Map_Symbols'),
('UCAS', 'Unified_Canadian_Aboriginal_Syllabics', 'Canadian_Syllabics'),
('UCAS_Ext', 'Unified_Canadian_Aboriginal_Syllabics_Extended'),
('Ugaritic', 'Ugaritic'),
('Vai', 'Vai'),
('Vedic_Ext', 'Vedic_Extensions'),
('Vertical_Forms', 'Vertical_Forms'),
('VS', 'Variation_Selectors'),
('VS_Sup', 'Variation_Selectors_Supplement'),
('Warang_Citi', 'Warang_Citi'),
('Yi_Radicals', 'Yi_Radicals'),
('Yi_Syllables', 'Yi_Syllables'),
('Yijing', 'Yijing_Hexagram_Symbols'),
], catalog=True, min='9.0.0')
prop.make_enum(g, ['sc', 'Script'], [
('Adlm', 'Adlam'),
('Aghb', 'Caucasian_Albanian'),
('Ahom', 'Ahom'),
('Arab', 'Arabic'),
('Armi', 'Imperial_Aramaic'),
('Armn', 'Armenian'),
('Avst', 'Avestan'),
('Bali', 'Balinese'),
('Bamu', 'Bamum'),
('Bass', 'Bassa_Vah'),
('Batk', 'Batak'),
('Beng', 'Bengali'),
('Bhks', 'Bhaiksuki'),
('Bopo', 'Bopomofo'),
('Brah', 'Brahmi'),
('Brai', 'Braille'),
('Bugi', 'Buginese'),
('Buhd', 'Buhid'),
('Cakm', 'Chakma'),
('Cans', 'Canadian_Aboriginal'),
('Cari', 'Carian'),
('Cham', 'Cham'),
('Cher', 'Cherokee'),
('Copt', 'Coptic', 'Qaac'),
('Cprt', 'Cypriot'),
('Cyrl', 'Cyrillic'),
('Deva', 'Devanagari'),
('Dsrt', 'Deseret'),
('Dupl', 'Duployan'),
('Egyp', 'Egyptian_Hieroglyphs'),
('Elba', 'Elbasan'),
('Ethi', 'Ethiopic'),
('Geor', 'Georgian'),
('Glag', 'Glagolitic'),
('Goth', 'Gothic'),
('Gran', 'Grantha'),
('Grek', 'Greek'),
('Gujr', 'Gujarati'),
('Guru', 'Gurmukhi'),
('Hang', 'Hangul'),
('Hani', 'Han'),
('Hano', 'Hanunoo'),
('Hatr', 'Hatran'),
('Hebr', 'Hebrew'),
('Hira', 'Hiragana'),
('Hluw', 'Anatolian_Hieroglyphs'),
('Hmng', 'Pahawh_Hmong'),
('Hrkt', 'Katakana_Or_Hiragana'),
('Hung', 'Old_Hungarian'),
('Ital', 'Old_Italic'),
('Java', 'Javanese'),
('Kali', 'Kayah_Li'),
('Kana', 'Katakana'),
('Khar', 'Kharoshthi'),
('Khmr', 'Khmer'),
('Khoj', 'Khojki'),
('Knda', 'Kannada'),
('Kthi', 'Kaithi'),
('Lana', 'Tai_Tham'),
('Laoo', 'Lao'),
('Latn', 'Latin'),
('Lepc', 'Lepcha'),
('Limb', 'Limbu'),
('Lina', 'Linear_A'),
('Linb', 'Linear_B'),
('Lisu', 'Lisu'),
('Lyci', 'Lycian'),
('Lydi', 'Lydian'),
('Mahj', 'Mahajani'),
('Mand', 'Mandaic'),
('Mani', 'Manichaean'),
('Marc', 'Marchen'),
('Mend', 'Mende_Kikakui'),
('Merc', 'Meroitic_Cursive'),
('Mero', 'Meroitic_Hieroglyphs'),
('Mlym', 'Malayalam'),
('Modi', 'Modi'),
('Mong', 'Mongolian'),
('Mroo', 'Mro'),
('Mtei', 'Meetei_Mayek'),
('Mult', 'Multani'),
('Mymr', 'Myanmar'),
('Narb', 'Old_North_Arabian'),
('Nbat', 'Nabataean'),
('Newa', 'Newa'),
('Nkoo', 'Nko'),
('Ogam', 'Ogham'),
('Olck', 'Ol_Chiki'),
('Orkh', 'Old_Turkic'),
('Orya', 'Oriya'),
('Osge', 'Osage'),
('Osma', 'Osmanya'),
('Palm', 'Palmyrene'),
('Pauc', 'Pau_Cin_Hau'),
('Perm', 'Old_Permic'),
('Phag', 'Phags_Pa'),
('Phli', 'Inscriptional_Pahlavi'),
('Phlp', 'Psalter_Pahlavi'),
('Phnx', 'Phoenician'),
('Plrd', 'Miao'),
('Prti', 'Inscriptional_Parthian'),
('Rjng', 'Rejang'),
('Runr', 'Runic'),
('Samr', 'Samaritan'),
('Sarb', 'Old_South_Arabian'),
('Saur', 'Saurashtra'),
('Sgnw', 'SignWriting'),
('Shaw', 'Shavian'),
('Shrd', 'Sharada'),
('Sidd', 'Siddham'),
('Sind', 'Khudawadi'),
('Sinh', 'Sinhala'),
('Sora', 'Sora_Sompeng'),
('Sund', 'Sundanese'),
('Sylo', 'Syloti_Nagri'),
('Syrc', 'Syriac'),
('Tagb', 'Tagbanwa'),
('Takr', 'Takri'),
('Tale', 'Tai_Le'),
('Talu', 'New_Tai_Lue'),
('Taml', 'Tamil'),
('Tang', 'Tangut'),
('Tavt', 'Tai_Viet'),
('Telu', 'Telugu'),
('Tfng', 'Tifinagh'),
('Tglg', 'Tagalog'),
('Thaa', 'Thaana'),
('Thai', 'Thai'),
('Tibt', 'Tibetan'),
('Tirh', 'Tirhuta'),
('Ugar', 'Ugaritic'),
('Vaii', 'Vai'),
('Wara', 'Warang_Citi'),
('Xpeo', 'Old_Persian'),
('Xsux', 'Cuneiform'),
('Yiii', 'Yi'),
('Zinh', 'Inherited', 'Qaai'),
('Zyyy', 'Common'),
('Zzzz', 'Unknown'),
], catalog=True, min='9.0.0')
# Moved to be *after* Script
prop.make_set(g, ['scx', 'Script_Extensions'], Script, min='9.0.0')
# Enumerated Properties
prop.make_enum(g, ['bc', 'Bidi_Class'], [
('AL', 'Arabic_Letter'),
('AN', 'Arabic_Number'),
('B', 'Paragraph_Separator'),
('BN', 'Boundary_Neutral'),
('CS', 'Common_Separator'),
('EN', 'European_Number'),
('ES', 'European_Separator'),
('ET', 'European_Terminator'),
('FSI', 'First_Strong_Isolate'),
('L', 'Left_To_Right'),
('LRE', 'Left_To_Right_Embedding'),
('LRI', 'Left_To_Right_Isolate'),
('LRO', 'Left_To_Right_Override'),
('NSM', 'Nonspacing_Mark'),
('ON', 'Other_Neutral'),
('PDF', 'Pop_Directional_Format'),
('PDI', 'Pop_Directional_Isolate'),
('R', 'Right_To_Left'),
('RLE', 'Right_To_Left_Embedding'),
('RLI', 'Right_To_Left_Isolate'),
('RLO', 'Right_To_Left_Override'),
('S', 'Segment_Separator'),
('WS', 'White_Space'),
], min='9.0.0')
prop.make_enum(g, ['bpt', 'Bidi_Paired_Bracket_Type'], [
('c', 'Close'),
('n', 'None'),
('o', 'Open'),
], min='9.0.0')
prop.make_enum(g, ['ccc', 'Canonical_Combining_Class'], [
('0', 'NR', 'Not_Reordered'),
('1', 'OV', 'Overlay'),
('7', 'NK', 'Nukta'),
('8', 'KV', 'Kana_Voicing'),
('9', 'VR', 'Virama'),
('10', 'CCC10', 'CCC10'),
('11', 'CCC11', 'CCC11'),
('12', 'CCC12', 'CCC12'),
('13', 'CCC13', 'CCC13'),
('14', 'CCC14', 'CCC14'),
('15', 'CCC15', 'CCC15'),
('16', 'CCC16', 'CCC16'),
('17', 'CCC17', 'CCC17'),
('18', 'CCC18', 'CCC18'),
('19', 'CCC19', 'CCC19'),
('20', 'CCC20', 'CCC20'),
('21', 'CCC21', 'CCC21'),
('22', 'CCC22', 'CCC22'),
('23', 'CCC23', 'CCC23'),
('24', 'CCC24', 'CCC24'),
('25', 'CCC25', 'CCC25'),
('26', 'CCC26', 'CCC26'),
('27', 'CCC27', 'CCC27'),
('28', 'CCC28', 'CCC28'),
('29', 'CCC29', 'CCC29'),
('30', 'CCC30', 'CCC30'),
('31', 'CCC31', 'CCC31'),
('32', 'CCC32', 'CCC32'),
('33', 'CCC33', 'CCC33'),
('34', 'CCC34', 'CCC34'),
('35', 'CCC35', 'CCC35'),
('36', 'CCC36', 'CCC36'),
('84', 'CCC84', 'CCC84'),
('91', 'CCC91', 'CCC91'),
('103', 'CCC103', 'CCC103'),
('107', 'CCC107', 'CCC107'),
('118', 'CCC118', 'CCC118'),
('122', 'CCC122', 'CCC122'),
('129', 'CCC129', 'CCC129'),
('130', 'CCC130', 'CCC130'),
('132', 'CCC132', 'CCC132'),
('133', 'CCC133', 'CCC133'),
('200', 'ATBL', 'Attached_Below_Left'),
('202', 'ATB', 'Attached_Below'),
('214', 'ATA', 'Attached_Above'),
('216', 'ATAR', 'Attached_Above_Right'),
('218', 'BL', 'Below_Left'),
('220', 'B', 'Below'),
('222', 'BR', 'Below_Right'),
('224', 'L', 'Left'),
('226', 'R', 'Right'),
('228', 'AL', 'Above_Left'),
('230', 'A', 'Above'),
('232', 'AR', 'Above_Right'),
('233', 'DB', 'Double_Below'),
('234', 'DA', 'Double_Above'),
('240', 'IS', 'Iota_Subscript'),
], manual_index=True, min='9.0.0')
prop.make_enum(g, ['dt', 'Decomposition_Type'], [
('Can', 'Canonical', 'can'),
('Com', 'Compat', 'com'),
('Enc', 'Circle', 'enc'),
('Fin', 'Final', 'fin'),
('Font', 'Font', 'font'),
('Fra', 'Fraction', 'fra'),
('Init', 'Initial', 'init'),
('Iso', 'Isolated', 'iso'),
('Med', 'Medial', 'med'),
('Nar', 'Narrow', 'nar'),
('Nb', 'Nobreak', 'nb'),
('None', 'None', 'none'),
('Sml', 'Small', 'sml'),
('Sqr', 'Square', 'sqr'),
('Sub', 'Sub', 'sub'),
('Sup', 'Super', 'sup'),
('Vert', 'Vertical', 'vert'),
('Wide', 'Wide', 'wide'),
], min='9.0.0')
prop.make_enum(g, ['ea', 'East_Asian_Width'], [
('A', 'Ambiguous'),
('F', 'Fullwidth'),
('H', 'Halfwidth'),
('N', 'Neutral'),
('Na', 'Narrow'),
('W', 'Wide'),
], min='9.0.0')
prop.make_enum(g, ['gc', 'General_Category'], [
('C', 'Other'),
('Cc', 'Control', 'cntrl'),
('Cf', 'Format'),
('Cn', 'Unassigned'),
('Co', 'Private_Use'),
('Cs', 'Surrogate'),
('L', 'Letter'),
('LC', 'Cased_Letter'),
('Ll', 'Lowercase_Letter'),
('Lm', 'Modifier_Letter'),
('Lo', 'Other_Letter'),
('Lt', 'Titlecase_Letter'),
('Lu', 'Uppercase_Letter'),
('M', 'Mark', 'Combining_Mark'),
('Mc', 'Spacing_Mark'),
('Me', 'Enclosing_Mark'),
('Mn', 'Nonspacing_Mark'),
('N', 'Number'),
('Nd', 'Decimal_Number', 'digit'),
('Nl', 'Letter_Number'),
('No', 'Other_Number'),
('P', 'Punctuation', 'punct'),
('Pc', 'Connector_Punctuation'),
('Pd', 'Dash_Punctuation'),
('Pe', 'Close_Punctuation'),
('Pf', 'Final_Punctuation'),
('Pi', 'Initial_Punctuation'),
('Po', 'Other_Punctuation'),
('Ps', 'Open_Punctuation'),
('S', 'Symbol'),
('Sc', 'Currency_Symbol'),
('Sk', 'Modifier_Symbol'),
('Sm', 'Math_Symbol'),
('So', 'Other_Symbol'),
('Z', 'Separator'),
('Zl', 'Line_Separator'),
('Zp', 'Paragraph_Separator'),
('Zs', 'Space_Separator'),
], min='9.0.0')
prop.make_enum(g, ['GCB', 'Grapheme_Cluster_Break'], [
('CN', 'Control'),
('CR', 'CR'),
('EB', 'E_Base'),
('EBG', 'E_Base_GAZ'),
('EM', 'E_Modifier'),
('EX', 'Extend'),
('GAZ', 'Glue_After_Zwj'),
('L', 'L'),
('LF', 'LF'),
('LV', 'LV'),
('LVT', 'LVT'),
('PP', 'Prepend'),
('RI', 'Regional_Indicator'),
('SM', 'SpacingMark'),
('T', 'T'),
('V', 'V'),
('XX', 'Other'),
('ZWJ', 'ZWJ'),
], min='9.0.0')
prop.make_enum(g, ['hst', 'Hangul_Syllable_Type'], [
('L', 'Leading_Jamo'),
('LV', 'LV_Syllable'),
('LVT', 'LVT_Syllable'),
('NA', 'Not_Applicable'),
('T', 'Trailing_Jamo'),
('V', 'Vowel_Jamo'),
], min='9.0.0')
prop.make_enum(g, ['InPC', 'Indic_Positional_Category'], [
('Bottom', 'Bottom'),
('Bottom_And_Right', 'Bottom_And_Right'),
('Left', 'Left'),
('Left_And_Right', 'Left_And_Right'),
('NA', 'NA'),
('Overstruck', 'Overstruck'),
('Right', 'Right'),
('Top', 'Top'),
('Top_And_Bottom', 'Top_And_Bottom'),
('Top_And_Bottom_And_Right', 'Top_And_Bottom_And_Right'),
('Top_And_Left', 'Top_And_Left'),
('Top_And_Left_And_Right', 'Top_And_Left_And_Right'),
('Top_And_Right', 'Top_And_Right'),
('Visual_Order_Left', 'Visual_Order_Left'),
], min='9.0.0')
prop.make_enum(g, ['InSC', 'Indic_Syllabic_Category'], [
('Avagraha', 'Avagraha'),
('Bindu', 'Bindu'),
('Brahmi_Joining_Number', 'Brahmi_Joining_Number'),
('Cantillation_Mark', 'Cantillation_Mark'),
('Consonant', 'Consonant'),
('Consonant_Dead', 'Consonant_Dead'),
('Consonant_Final', 'Consonant_Final'),
('Consonant_Head_Letter', 'Consonant_Head_Letter'),
('Consonant_Killer', 'Consonant_Killer'),
('Consonant_Medial', 'Consonant_Medial'),
('Consonant_Placeholder', 'Consonant_Placeholder'),
('Consonant_Preceding_Repha', 'Consonant_Preceding_Repha'),
('Consonant_Prefixed', 'Consonant_Prefixed'),
('Consonant_Subjoined', 'Consonant_Subjoined'),
('Consonant_Succeeding_Repha', 'Consonant_Succeeding_Repha'),
('Consonant_With_Stacker', 'Consonant_With_Stacker'),
('Gemination_Mark', 'Gemination_Mark'),
('Invisible_Stacker', 'Invisible_Stacker'),
('Joiner', 'Joiner'),
('Modifying_Letter', 'Modifying_Letter'),
('Non_Joiner', 'Non_Joiner'),
('Nukta', 'Nukta'),
('Number', 'Number'),
('Number_Joiner', 'Number_Joiner'),
('Other', 'Other'),
('Pure_Killer', 'Pure_Killer'),
('Register_Shifter', 'Register_Shifter'),
('Syllable_Modifier', 'Syllable_Modifier'),
('Tone_Letter', 'Tone_Letter'),
('Tone_Mark', 'Tone_Mark'),
('Virama', 'Virama'),
('Visarga', 'Visarga'),
('Vowel', 'Vowel'),
('Vowel_Dependent', 'Vowel_Dependent'),
('Vowel_Independent', 'Vowel_Independent'),
], min='9.0.0')
prop.make_enum(g, ['jg', 'Joining_Group'], [
('African_Feh', 'African_Feh'),
('African_Noon', 'African_Noon'),
('African_Qaf', 'African_Qaf'),
('Ain', 'Ain'),
('Alaph', 'Alaph'),
('Alef', 'Alef'),
('Beh', 'Beh'),
('Beth', 'Beth'),
('Burushaski_Yeh_Barree', 'Burushaski_Yeh_Barree'),
('Dal', 'Dal'),
('Dalath_Rish', 'Dalath_Rish'),
('E', 'E'),
('Farsi_Yeh', 'Farsi_Yeh'),
('Fe', 'Fe'),
('Feh', 'Feh'),
('Final_Semkath', 'Final_Semkath'),
('Gaf', 'Gaf'),
('Gamal', 'Gamal'),
('Hah', 'Hah'),
('He', 'He'),
('Heh', 'Heh'),
('Heh_Goal', 'Heh_Goal'),
('Heth', 'Heth'),
('Kaf', 'Kaf'),
('Kaph', 'Kaph'),
('Khaph', 'Khaph'),
('Knotted_Heh', 'Knotted_Heh'),
('Lam', 'Lam'),
('Lamadh', 'Lamadh'),
('Manichaean_Aleph', 'Manichaean_Aleph'),
('Manichaean_Ayin', 'Manichaean_Ayin'),
('Manichaean_Beth', 'Manichaean_Beth'),
('Manichaean_Daleth', 'Manichaean_Daleth'),
('Manichaean_Dhamedh', 'Manichaean_Dhamedh'),
('Manichaean_Five', 'Manichaean_Five'),
('Manichaean_Gimel', 'Manichaean_Gimel'),
('Manichaean_Heth', 'Manichaean_Heth'),
('Manichaean_Hundred', 'Manichaean_Hundred'),
('Manichaean_Kaph', 'Manichaean_Kaph'),
('Manichaean_Lamedh', 'Manichaean_Lamedh'),
('Manichaean_Mem', 'Manichaean_Mem'),
('Manichaean_Nun', 'Manichaean_Nun'),
('Manichaean_One', 'Manichaean_One'),
('Manichaean_Pe', 'Manichaean_Pe'),
('Manichaean_Qoph', 'Manichaean_Qoph'),
('Manichaean_Resh', 'Manichaean_Resh'),
('Manichaean_Sadhe', 'Manichaean_Sadhe'),
('Manichaean_Samekh', 'Manichaean_Samekh'),
('Manichaean_Taw', 'Manichaean_Taw'),
('Manichaean_Ten', 'Manichaean_Ten'),
('Manichaean_Teth', 'Manichaean_Teth'),
('Manichaean_Thamedh', 'Manichaean_Thamedh'),
('Manichaean_Twenty', 'Manichaean_Twenty'),
('Manichaean_Waw', 'Manichaean_Waw'),
('Manichaean_Yodh', 'Manichaean_Yodh'),
('Manichaean_Zayin', 'Manichaean_Zayin'),
('Meem', 'Meem'),
('Mim', 'Mim'),
('No_Joining_Group', 'No_Joining_Group'),
('Noon', 'Noon'),
('Nun', 'Nun'),
('Nya', 'Nya'),
('Pe', 'Pe'),
('Qaf', 'Qaf'),
('Qaph', 'Qaph'),
('Reh', 'Reh'),
('Reversed_Pe', 'Reversed_Pe'),
('Rohingya_Yeh', 'Rohingya_Yeh'),
('Sad', 'Sad'),
('Sadhe', 'Sadhe'),
('Seen', 'Seen'),
('Semkath', 'Semkath'),
('Shin', 'Shin'),
('Straight_Waw', 'Straight_Waw'),
('Swash_Kaf', 'Swash_Kaf'),
('Syriac_Waw', 'Syriac_Waw'),
('Tah', 'Tah'),
('Taw', 'Taw'),
('Teh_Marbuta', 'Teh_Marbuta'),
('Teh_Marbuta_Goal', 'Hamza_On_Heh_Goal'),
('Teth', 'Teth'),
('Waw', 'Waw'),
('Yeh', 'Yeh'),
('Yeh_Barree', 'Yeh_Barree'),
('Yeh_With_Tail', 'Yeh_With_Tail'),
('Yudh', 'Yudh'),
('Yudh_He', 'Yudh_He'),
('Zain', 'Zain'),
('Zhain', 'Zhain'),
], min='9.0.0')
prop.make_enum(g, ['jt', 'Joining_Type'], [
('C', 'Join_Causing'),
('D', 'Dual_Joining'),
('L', 'Left_Joining'),
('R', 'Right_Joining'),
('T', 'Transparent'),
('U', 'Non_Joining'),
], min='9.0.0')
prop.make_enum(g, ['lb', 'Line_Break'], [
('AI', 'Ambiguous'),
('AL', 'Alphabetic'),
('B2', 'Break_Both'),
('BA', 'Break_After'),
('BB', 'Break_Before'),
('BK', 'Mandatory_Break'),
('CB', 'Contingent_Break'),
('CJ', 'Conditional_Japanese_Starter'),
('CL', 'Close_Punctuation'),
('CM', 'Combining_Mark'),
('CP', 'Close_Parenthesis'),
('CR', 'Carriage_Return'),
('EB', 'E_Base'),
('EM', 'E_Modifier'),
('EX', 'Exclamation'),
('GL', 'Glue'),
('H2', 'H2'),
('H3', 'H3'),
('HL', 'Hebrew_Letter'),
('HY', 'Hyphen'),
('ID', 'Ideographic'),
('IN', 'Inseparable', 'Inseperable'),
('IS', 'Infix_Numeric'),
('JL', 'JL'),
('JT', 'JT'),
('JV', 'JV'),
('LF', 'Line_Feed'),
('NL', 'Next_Line'),
('NS', 'Nonstarter'),
('NU', 'Numeric'),
('OP', 'Open_Punctuation'),
('PO', 'Postfix_Numeric'),
('PR', 'Prefix_Numeric'),
('QU', 'Quotation'),
('RI', 'Regional_Indicator'),
('SA', 'Complex_Context'),
('SG', 'Surrogate'),
('SP', 'Space'),
('SY', 'Break_Symbols'),
('WJ', 'Word_Joiner'),
('XX', 'Unknown'),
('ZW', 'ZWSpace'),
('ZWJ', 'ZWJ'),
], min='9.0.0')
prop.make_enum(g, ['NFC_QC', 'NFC_Quick_Check'], [
('M', 'Maybe'),
('N', 'No'),
('Y', 'Yes'),
], min='9.0.0')
prop.make_enum(g, ['NFD_QC', 'NFD_Quick_Check'], [
('N', 'No'),
('Y', 'Yes'),
], min='9.0.0')
prop.make_enum(g, ['NFKC_QC', 'NFKC_Quick_Check'], [
('M', 'Maybe'),
('N', 'No'),
('Y', 'Yes'),
], min='9.0.0')
prop.make_enum(g, ['NFKD_QC', 'NFKD_Quick_Check'], [
('N', 'No'),
('Y', 'Yes'),
], min='9.0.0')
prop.make_enum(g, ['nt', 'Numeric_Type'], [
('De', 'Decimal'),
('Di', 'Digit'),
('None', 'None'),
('Nu', 'Numeric'),
], min='9.0.0')
prop.make_enum(g, ['SB', 'Sentence_Break'], [
('AT', 'ATerm'),
('CL', 'Close'),
('CR', 'CR'),
('EX', 'Extend'),
('FO', 'Format'),
('LE', 'OLetter'),
('LF', 'LF'),
('LO', 'Lower'),
('NU', 'Numeric'),
('SC', 'SContinue'),
('SE', 'Sep'),
('SP', 'Sp'),
('ST', 'STerm'),
('UP', 'Upper'),
('XX', 'Other'),
], min='9.0.0')
prop.make_enum(g, ['WB', 'Word_Break'], [
('CR', 'CR'),
('DQ', 'Double_Quote'),
('EB', 'E_Base'),
('EBG', 'E_Base_GAZ'),
('EM', 'E_Modifier'),
('EX', 'ExtendNumLet'),
('Extend', 'Extend'),
('FO', 'Format'),
('GAZ', 'Glue_After_Zwj'),
('HL', 'Hebrew_Letter'),
('KA', 'Katakana'),
('LE', 'ALetter'),
('LF', 'LF'),
('MB', 'MidNumLet'),
('ML', 'MidLetter'),
('MN', 'MidNum'),
('NL', 'Newline'),
('NU', 'Numeric'),
('RI', 'Regional_Indicator'),
('SQ', 'Single_Quote'),
('XX', 'Other'),
('ZWJ', 'ZWJ'),
], min='9.0.0')
# Binary Properties
prop.make_bool(g, ['AHex', 'ASCII_Hex_Digit'], min='9.0.0')
prop.make_bool(g, ['Alpha', 'Alphabetic'], min='9.0.0')
prop.make_bool(g, ['Bidi_C', 'Bidi_Control'], min='9.0.0')
prop.make_bool(g, ['Bidi_M', 'Bidi_Mirrored'], min='9.0.0')
prop.make_bool(g, ['Cased', 'Cased'], min='9.0.0')
prop.make_bool(g, ['CE', 'Composition_Exclusion'], min='9.0.0')
prop.make_bool(g, ['CI', 'Case_Ignorable'], min='9.0.0')
prop.make_bool(g, ['Comp_Ex', 'Full_Composition_Exclusion'], min='9.0.0')
prop.make_bool(g, ['CWCF', 'Changes_When_Casefolded'], min='9.0.0')
prop.make_bool(g, ['CWCM', 'Changes_When_Casemapped'], min='9.0.0')
prop.make_bool(g, ['CWKCF', 'Changes_When_NFKC_Casefolded'], min='9.0.0')
prop.make_bool(g, ['CWL', 'Changes_When_Lowercased'], min='9.0.0')
prop.make_bool(g, ['CWT', 'Changes_When_Titlecased'], min='9.0.0')
prop.make_bool(g, ['CWU', 'Changes_When_Uppercased'], min='9.0.0')
prop.make_bool(g, ['Dash', 'Dash'], min='9.0.0')
prop.make_bool(g, ['Dep', 'Deprecated'], min='9.0.0')
prop.make_bool(g, ['DI', 'Default_Ignorable_Code_Point'], min='9.0.0')
prop.make_bool(g, ['Dia', 'Diacritic'], min='9.0.0')
prop.make_bool(g, ['Ext', 'Extender'], min='9.0.0')
prop.make_bool(g, ['Gr_Base', 'Grapheme_Base'], min='9.0.0')
prop.make_bool(g, ['Gr_Ext', 'Grapheme_Extend'], min='9.0.0')
prop.make_bool(g, ['Gr_Link', 'Grapheme_Link'], min='9.0.0')
prop.make_bool(g, ['Hex', 'Hex_Digit'], min='9.0.0')
prop.make_bool(g, ['Hyphen', 'Hyphen'], min='9.0.0')
prop.make_bool(g, ['IDC', 'ID_Continue'], min='9.0.0')
prop.make_bool(g, ['Ideo', 'Ideographic'], min='9.0.0')
prop.make_bool(g, ['IDS', 'ID_Start'], min='9.0.0')
prop.make_bool(g, ['IDSB', 'IDS_Binary_Operator'], min='9.0.0')
prop.make_bool(g, ['IDST', 'IDS_Trinary_Operator'], min='9.0.0')
prop.make_bool(g, ['Join_C', 'Join_Control'], min='9.0.0')
prop.make_bool(g, ['LOE', 'Logical_Order_Exception'], min='9.0.0')
prop.make_bool(g, ['Lower', 'Lowercase'], min='9.0.0')
prop.make_bool(g, ['Math', 'Math'], min='9.0.0')
prop.make_bool(g, ['NChar', 'Noncharacter_Code_Point'], min='9.0.0')
prop.make_bool(g, ['OAlpha', 'Other_Alphabetic'], min='9.0.0')
prop.make_bool(g, ['ODI', 'Other_Default_Ignorable_Code_Point'], min='9.0.0')
prop.make_bool(g, ['OGr_Ext', 'Other_Grapheme_Extend'], min='9.0.0')
prop.make_bool(g, ['OIDC', 'Other_ID_Continue'], min='9.0.0')
prop.make_bool(g, ['OIDS', 'Other_ID_Start'], min='9.0.0')
prop.make_bool(g, ['OLower', 'Other_Lowercase'], min='9.0.0')
prop.make_bool(g, ['OMath', 'Other_Math'], min='9.0.0')
prop.make_bool(g, ['OUpper', 'Other_Uppercase'], min='9.0.0')
prop.make_bool(g, ['Pat_Syn', 'Pattern_Syntax'], min='9.0.0')
prop.make_bool(g, ['Pat_WS', 'Pattern_White_Space'], min='9.0.0')
prop.make_bool(g, ['PCM', 'Prepended_Concatenation_Mark'], min='9.0.0')
prop.make_bool(g, ['QMark', 'Quotation_Mark'], min='9.0.0')
prop.make_bool(g, ['Radical', 'Radical'], min='9.0.0')
prop.make_bool(g, ['SD', 'Soft_Dotted'], min='9.0.0')
prop.make_bool(g, ['STerm', 'Sentence_Terminal'], min='9.0.0')
prop.make_bool(g, ['Term', 'Terminal_Punctuation'], min='9.0.0')
prop.make_bool(g, ['UIdeo', 'Unified_Ideograph'], min='9.0.0')
prop.make_bool(g, ['Upper', 'Uppercase'], min='9.0.0')
prop.make_bool(g, ['VS', 'Variation_Selector'], min='9.0.0')
prop.make_bool(g, ['WSpace', 'White_Space', 'space'], min='9.0.0')
prop.make_bool(g, ['XIDC', 'XID_Continue'], min='9.0.0')
prop.make_bool(g, ['XIDS', 'XID_Start'], min='9.0.0')
prop.make_bool(g, ['XO_NFC', 'Expands_On_NFC'], min='9.0.0')
prop.make_bool(g, ['XO_NFD', 'Expands_On_NFD'], min='9.0.0')
prop.make_bool(g, ['XO_NFKC', 'Expands_On_NFKC'], min='9.0.0')
prop.make_bool(g, ['XO_NFKD', 'Expands_On_NFKD'], min='9.0.0')
_init_classes()
''' Fixed classes used to support uni.alias and its dynamic classes.
'''
import decimal, fractions
from functools import partial
from . import _util
def _split(s):
rv = s.split()
assert set(rv) == set(s.split(' ')) - {''}
return rv
class _Property:
''' Base of all exposed property classes.
By default, this class is not instantiable.
'''
def __new__(cls, *args, **kwargs):
if not issubclass(cls, _InstantiableProperty):
raise NotImplementedError('most property classes are not instantiable')
return object.__new__(cls)
def __init__(self):
super().__init__()
@classmethod
def check(cls, val):
raise NotImplementedError('Needs to be implemented in subclasses!')
@classmethod
def convert(cls, val):
raise NotImplementedError('Needs to be implemented in subclasses!')
# TODO actually optimize using bitsets when possible.
class Set(_Property):
''' Properties whose values are a set of 0 or more of another property.
New subclasses are created dynamically by make_set.
'''
@classmethod
def check(cls, val):
assert issubclass(cls.element, _Property)
assert isinstance(val, set)
for v in val:
assert v is cls.element.filter(v)
@classmethod
def convert(cls, val):
assert issubclass(cls.element, _Property)
assert isinstance(val, str)
rvl = [cls.element.convert(v) for v in _split(val)]
rv = set(rvl)
assert len(rv) == len(rvl)
return rv
class _EnumLikeProperty(_Property):
''' Base for properties with a fixed number of values.
'''
@classmethod
def check(cls, val):
assert isinstance(val, cls)
assert val in cls.value_set
@classmethod
def convert(cls, val):
assert isinstance(val, str)
by_name = cls.value_by_name
return by_name[val]
class _InstantiableProperty(_EnumLikeProperty):
''' Base for properties whose values are instances thereof.
New subclasses are created dynamically by make_enum.
'''
def __init__(self, index, aliases, *, manual_index):
self.value_aliases = aliases
self.short_value_name = aliases[manual_index + 0]
self.long_value_name = aliases[manual_index + 1]
def __repr__(self):
return '%s.%s' % (self.__class__.__name__, self.long_value_name)
class Enum(_InstantiableProperty):
''' Property whose values rarely expand in future versions.
As a result, they can meaningfully be used with bitsets.
'''
def __init__(self, index, aliases, *, manual_index):
super().__init__(index, aliases, manual_index=manual_index)
if manual_index:
index = int(aliases[0])
self.index = index
class Catalog(_InstantiableProperty):
''' Property whose values commonly expand in future versions.
As a result, they *cannot* meaningfully be used with bitsets.
'''
class Bool(_EnumLikeProperty):
''' Property whose values are true/false.
'''
value_by_name = {
k: v
for keys, v in [
(('N', 'No', 'F', 'False'), False),
(('Y', 'Yes', 'T', 'True'), True),
]
for k in keys
}
value_set = set(value_by_name.values())
value_list = [False, True]
class Int(_Property):
''' Property whose values are an integer, encoded in base 10.
'''
@classmethod
def check(cls, val):
assert isinstance(val, int)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
return int(val)
class Rational(_Property):
''' Property whose values are a ratio of integers, encoded with a slash.
'''
@classmethod
def check(cls, val):
assert isinstance(val, fractions.Fraction)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
return fractions.Fraction(val)
class Decimal(_Property):
''' Property whose values are a ratio of integers, encoded as decimal.
May be inexact.
'''
@classmethod
def check(cls, val):
assert isinstance(val, decimal.Decimal)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
return decimal.Decimal(val)
class RawString(_Property):
''' Property whose values are unencoded strings.
'''
@classmethod
def check(cls, val):
assert isinstance(val, str)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
return val
class Codepoint(_Property):
''' Property whose values are single codepoints.
'''
@classmethod
def check(cls, val):
assert isinstance(val, int)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
assert 4 <= len(val) <= 6
return int(val, 16)
class CodepointRange(_Property):
''' Property whose values are ranges of codepoints.
'''
@classmethod
def check(cls, val):
assert isinstance(val, tuple)
assert len(val) == 2
assert isinstance(val[0], int)
assert isinstance(val[1], int)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
if '..' in val:
low, high = val.split('..')
assert low != high
else:
low = high = val
return Codepoint.convert(low), Codepoint.convert(high)
class CodepointSequence(_Property):
''' Property whose values are strings, encoded as a sequence of codepoints.
'''
@classmethod
def check(cls, val):
assert isinstance(val, str)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
return ''.join(chr(cp) for cp in _split(val))
class U_Codepoint(_Property):
''' Property whose values are single codepoints, encoded with U+ out front.
'''
@classmethod
def check(cls, val):
assert isinstance(val, int)
@classmethod
def convert(cls, val):
assert isinstance(val, str)
assert str.startswith('U+')
return Codepoint.convert(val[2:])
def unique_aliases(aliases):
aliases = list(aliases)
aliases_folded = [a.casefold() for a in aliases]
seen_orig = set()
seen_fold = set()
remove = []
for i, a in enumerate(aliases):
f = a.casefold()
if a in seen_orig:
assert (i == 1 and len(set(aliases_folded)) == 1) or (i == 2 and len(aliases) == 3 and a.startswith('CCC')), a
remove.append(i)
elif f in seen_fold:
assert f in aliases_folded[:2], f
seen_orig.add(a)
seen_fold.add(f)
while remove:
aliases.pop(remove.pop())
return aliases
def _make_aliases(g, aliases, cls):
for a in unique_aliases(aliases):
assert a not in g, a
g[a] = cls
def _make_class(g, aliases, cls, *, doc=None, min, max='9999.0.0'):
# TODO make doc mandatory
cls.__module__ = g['__name__']
cls.__doc__ = doc
cls.min_unicode_version = min
cls.max_unicode_version = max
cls.aliases = aliases
cls.short_name = aliases[0]
cls.long_name = aliases[1]
cls.__qualname__ = cls.__name__ = cls.long_name
_make_aliases(g, aliases, cls)
g['__all__'].append(cls.__name__)
def _make_subclass(g, aliases, cls, **kwargs):
class C(cls):
pass
_make_class(g, aliases, C, **kwargs)
def make_set(g, aliases, elem, **kwargs):
class S(Set):
element = elem
_make_class(g, aliases, S, **kwargs)
def make_enum(g, aliases, values, *, catalog=False, manual_index=False, **kwargs):
class E(Catalog if catalog else Enum):
value_by_name = {}
value_set = set()
value_list = []
_make_class(g, aliases, E, **kwargs)
for i, pvas in enumerate(values):
v = E(i, pvas, manual_index=manual_index)
E.value_list.append(v)
_make_aliases(E.value_by_name, pvas, v)
_make_aliases(_util.ClassDict(E), pvas, v)
E.value_set = set(E.value_by_name.values())
make_bool = partial(_make_subclass, cls=Bool)
make_int = partial(_make_subclass, cls=Int)
make_rational = partial(_make_subclass, cls=Rational)
make_decimal = partial(_make_subclass, cls=Decimal)
make_raw = partial(_make_subclass, cls=RawString)
make_code = partial(_make_subclass, cls=Codepoint)
make_code_range = partial(_make_subclass, cls=CodepointRange)
make_code_seq = partial(_make_subclass, cls=CodepointSequence)
make_u_code = partial(_make_subclass, cls=U_Codepoint)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment