Created
March 28, 2017 15:58
-
-
Save o11c/84e8f05a29dfdee3503f6bb0fc6f0f4e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import collections.abc | |
| class ClassDict(collections.abc.MutableMapping): | |
| ''' Wrapper for cls.__dict__ that supports assignment. | |
| ''' | |
| def __init__(self, cls): | |
| self.cls = cls | |
| self.dct = cls.__dict__ | |
| def __getitem__(self, k): | |
| return self.dct[k] | |
| def __setitem__(self, k, v): | |
| setattr(self.cls, k, v) | |
| def __delitem__(self, k): | |
| delattr(self.cls, k) | |
| def __iter__(self): | |
| return iter(self.dct) | |
| def __len__(self): | |
| return len(self.dct) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Contents of PropertyAliases.txt and PropertyValueAliases.txt are | |
| # hard-coded here, then just checked during UCD loading. | |
| # | |
| # Also other internal stuff! This turns out to be really useful. | |
| # (there's no reason a "property" has to apply to a single codepoint) | |
| from . import prop | |
| __all__ = [] | |
| def _init_classes(): | |
| g = globals() | |
| # implementation internals | |
| prop.make_raw(g, ['X_Schematic_Name', 'X_Schematic_Name'], min='9999.0.0') | |
| prop.make_enum(g, ['X_Shaping_Environment', 'X_Shaping_Environment'], [ | |
| ('isolate', 'isolate'), | |
| ('initial', 'initial'), | |
| ('medial', 'medial'), | |
| ('final', 'final'), | |
| ], min='9999.0.0') | |
| prop.make_set(g, ['X_Shaping_Environment_Set', 'X_Shaping_Environment_Set'], X_Shaping_Environment, min='9999.0.0') | |
| prop.make_enum(g, ['X_Case_Folding_Status', 'X_Case_Folding_Status'], [ | |
| ('C', 'Common'), | |
| ('F', 'Full'), | |
| ('S', 'Simple'), | |
| ('T', 'Turkic'), | |
| ], min='9999.0.0') | |
| # Numeric Properties | |
| prop.make_int(g, ['cjkAccountingNumeric', 'kAccountingNumeric'], min='9.0.0') | |
| prop.make_int(g, ['cjkOtherNumeric', 'kOtherNumeric'], min='9.0.0') | |
| prop.make_int(g, ['cjkPrimaryNumeric', 'kPrimaryNumeric'], min='9.0.0') | |
| prop.make_rational(g, ['nv', 'Numeric_Value'], min='9.0.0') | |
| # String Properties | |
| prop.make_code_seq(g, ['cf', 'Case_Folding'], min='9.0.0') | |
| prop.make_u_code(g, ['cjkCompatibilityVariant', 'kCompatibilityVariant'], min='9.0.0') | |
| prop.make_code_seq(g, ['dm', 'Decomposition_Mapping'], min='9.0.0') # stored in file with <dt> ahead | |
| prop.make_code_seq(g, ['FC_NFKC', 'FC_NFKC_Closure'], min='9.0.0') | |
| prop.make_code_seq(g, ['lc', 'Lowercase_Mapping'], min='9.0.0') | |
| prop.make_code_seq(g, ['NFKC_CF', 'NFKC_Casefold'], min='9.0.0') | |
| prop.make_code(g, ['scf', 'Simple_Case_Folding', 'sfc'], min='9.0.0') | |
| prop.make_code(g, ['slc', 'Simple_Lowercase_Mapping'], min='9.0.0') | |
| prop.make_code(g, ['stc', 'Simple_Titlecase_Mapping'], min='9.0.0') | |
| prop.make_code(g, ['suc', 'Simple_Uppercase_Mapping'], min='9.0.0') | |
| prop.make_code_seq(g, ['tc', 'Titlecase_Mapping'], min='9.0.0') | |
| prop.make_code_seq(g, ['uc', 'Uppercase_Mapping'], min='9.0.0') | |
| # Miscellaneous Properties | |
| prop.make_code(g, ['bmg', 'Bidi_Mirroring_Glyph'], min='9.0.0') | |
| prop.make_code(g, ['bpb', 'Bidi_Paired_Bracket'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIICore', 'kIICore'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_GSource', 'kIRG_GSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_HSource', 'kIRG_HSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_JSource', 'kIRG_JSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_KPSource', 'kIRG_KPSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_KSource', 'kIRG_KSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_MSource', 'kIRG_MSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_TSource', 'kIRG_TSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_USource', 'kIRG_USource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkIRG_VSource', 'kIRG_VSource'], min='9.0.0') | |
| prop.make_raw(g, ['cjkRSUnicode', 'kRSUnicode', 'Unicode_Radical_Stroke', 'URS'], min='9.0.0') | |
| prop.make_raw(g, ['isc', 'ISO_Comment'], min='9.0.0') | |
| prop.make_enum(g, ['JSN', 'Jamo_Short_Name'], [ | |
| # This special case is not listed in PropertyValueAliases.txt, | |
| # but is used in the Jamo file. | |
| ('', ''), | |
| ('A', 'A'), | |
| ('AE', 'AE'), | |
| ('B', 'B'), | |
| ('BB', 'BB'), | |
| ('BS', 'BS'), | |
| ('C', 'C'), | |
| ('D', 'D'), | |
| ('DD', 'DD'), | |
| ('E', 'E'), | |
| ('EO', 'EO'), | |
| ('EU', 'EU'), | |
| ('G', 'G'), | |
| ('GG', 'GG'), | |
| ('GS', 'GS'), | |
| ('H', 'H'), | |
| ('I', 'I'), | |
| ('J', 'J'), | |
| ('JJ', 'JJ'), | |
| ('K', 'K'), | |
| ('L', 'L'), | |
| ('LB', 'LB'), | |
| ('LG', 'LG'), | |
| ('LH', 'LH'), | |
| ('LM', 'LM'), | |
| ('LP', 'LP'), | |
| ('LS', 'LS'), | |
| ('LT', 'LT'), | |
| ('M', 'M'), | |
| ('N', 'N'), | |
| ('NG', 'NG'), | |
| ('NH', 'NH'), | |
| ('NJ', 'NJ'), | |
| ('O', 'O'), | |
| ('OE', 'OE'), | |
| ('P', 'P'), | |
| ('R', 'R'), | |
| ('S', 'S'), | |
| ('SS', 'SS'), | |
| ('T', 'T'), | |
| ('U', 'U'), | |
| ('WA', 'WA'), | |
| ('WAE', 'WAE'), | |
| ('WE', 'WE'), | |
| ('WEO', 'WEO'), | |
| ('WI', 'WI'), | |
| ('YA', 'YA'), | |
| ('YAE', 'YAE'), | |
| ('YE', 'YE'), | |
| ('YEO', 'YEO'), | |
| ('YI', 'YI'), | |
| ('YO', 'YO'), | |
| ('YU', 'YU'), | |
| ], min='9.0.0') | |
| prop.make_raw(g, ['na', 'Name'], min='9.0.0') | |
| prop.make_raw(g, ['na1', 'Unicode_1_Name'], min='9.0.0') | |
| prop.make_raw(g, ['Name_Alias', 'Name_Alias'], min='9.0.0') | |
| # Moved to be *after* Script | |
| #prop.make_set(g, ['scx', 'Script_Extensions'], Script, min='9.0.0') | |
| # Catalog Properties | |
| prop.make_enum(g, ['age', 'Age'], [ | |
| ('1.1', 'V1_1'), | |
| ('2.0', 'V2_0'), | |
| ('2.1', 'V2_1'), | |
| ('3.0', 'V3_0'), | |
| ('3.1', 'V3_1'), | |
| ('3.2', 'V3_2'), | |
| ('4.0', 'V4_0'), | |
| ('4.1', 'V4_1'), | |
| ('5.0', 'V5_0'), | |
| ('5.1', 'V5_1'), | |
| ('5.2', 'V5_2'), | |
| ('6.0', 'V6_0'), | |
| ('6.1', 'V6_1'), | |
| ('6.2', 'V6_2'), | |
| ('6.3', 'V6_3'), | |
| ('7.0', 'V7_0'), | |
| ('8.0', 'V8_0'), | |
| ('9.0', 'V9_0'), | |
| ('NA', 'Unassigned'), | |
| ], catalog=True, min='9.0.0') | |
| prop.make_enum(g, ['blk', 'Block'], [ | |
| ('Adlam', 'Adlam'), | |
| ('Aegean_Numbers', 'Aegean_Numbers'), | |
| ('Ahom', 'Ahom'), | |
| ('Alchemical', 'Alchemical_Symbols'), | |
| ('Alphabetic_PF', 'Alphabetic_Presentation_Forms'), | |
| ('Anatolian_Hieroglyphs', 'Anatolian_Hieroglyphs'), | |
| ('Ancient_Greek_Music', 'Ancient_Greek_Musical_Notation'), | |
| ('Ancient_Greek_Numbers', 'Ancient_Greek_Numbers'), | |
| ('Ancient_Symbols', 'Ancient_Symbols'), | |
| ('Arabic', 'Arabic'), | |
| ('Arabic_Ext_A', 'Arabic_Extended_A'), | |
| ('Arabic_Math', 'Arabic_Mathematical_Alphabetic_Symbols'), | |
| ('Arabic_PF_A', 'Arabic_Presentation_Forms_A', 'Arabic_Presentation_Forms-A'), | |
| ('Arabic_PF_B', 'Arabic_Presentation_Forms_B'), | |
| ('Arabic_Sup', 'Arabic_Supplement'), | |
| ('Armenian', 'Armenian'), | |
| ('Arrows', 'Arrows'), | |
| ('ASCII', 'Basic_Latin'), | |
| ('Avestan', 'Avestan'), | |
| ('Balinese', 'Balinese'), | |
| ('Bamum', 'Bamum'), | |
| ('Bamum_Sup', 'Bamum_Supplement'), | |
| ('Bassa_Vah', 'Bassa_Vah'), | |
| ('Batak', 'Batak'), | |
| ('Bengali', 'Bengali'), | |
| ('Bhaiksuki', 'Bhaiksuki'), | |
| ('Block_Elements', 'Block_Elements'), | |
| ('Bopomofo', 'Bopomofo'), | |
| ('Bopomofo_Ext', 'Bopomofo_Extended'), | |
| ('Box_Drawing', 'Box_Drawing'), | |
| ('Brahmi', 'Brahmi'), | |
| ('Braille', 'Braille_Patterns'), | |
| ('Buginese', 'Buginese'), | |
| ('Buhid', 'Buhid'), | |
| ('Byzantine_Music', 'Byzantine_Musical_Symbols'), | |
| ('Carian', 'Carian'), | |
| ('Caucasian_Albanian', 'Caucasian_Albanian'), | |
| ('Chakma', 'Chakma'), | |
| ('Cham', 'Cham'), | |
| ('Cherokee', 'Cherokee'), | |
| ('Cherokee_Sup', 'Cherokee_Supplement'), | |
| ('CJK', 'CJK_Unified_Ideographs'), | |
| ('CJK_Compat', 'CJK_Compatibility'), | |
| ('CJK_Compat_Forms', 'CJK_Compatibility_Forms'), | |
| ('CJK_Compat_Ideographs', 'CJK_Compatibility_Ideographs'), | |
| ('CJK_Compat_Ideographs_Sup', 'CJK_Compatibility_Ideographs_Supplement'), | |
| ('CJK_Ext_A', 'CJK_Unified_Ideographs_Extension_A'), | |
| ('CJK_Ext_B', 'CJK_Unified_Ideographs_Extension_B'), | |
| ('CJK_Ext_C', 'CJK_Unified_Ideographs_Extension_C'), | |
| ('CJK_Ext_D', 'CJK_Unified_Ideographs_Extension_D'), | |
| ('CJK_Ext_E', 'CJK_Unified_Ideographs_Extension_E'), | |
| ('CJK_Radicals_Sup', 'CJK_Radicals_Supplement'), | |
| ('CJK_Strokes', 'CJK_Strokes'), | |
| ('CJK_Symbols', 'CJK_Symbols_And_Punctuation'), | |
| ('Compat_Jamo', 'Hangul_Compatibility_Jamo'), | |
| ('Control_Pictures', 'Control_Pictures'), | |
| ('Coptic', 'Coptic'), | |
| ('Coptic_Epact_Numbers', 'Coptic_Epact_Numbers'), | |
| ('Counting_Rod', 'Counting_Rod_Numerals'), | |
| ('Cuneiform', 'Cuneiform'), | |
| ('Cuneiform_Numbers', 'Cuneiform_Numbers_And_Punctuation'), | |
| ('Currency_Symbols', 'Currency_Symbols'), | |
| ('Cypriot_Syllabary', 'Cypriot_Syllabary'), | |
| ('Cyrillic', 'Cyrillic'), | |
| ('Cyrillic_Ext_A', 'Cyrillic_Extended_A'), | |
| ('Cyrillic_Ext_B', 'Cyrillic_Extended_B'), | |
| ('Cyrillic_Ext_C', 'Cyrillic_Extended_C'), | |
| ('Cyrillic_Sup', 'Cyrillic_Supplement', 'Cyrillic_Supplementary'), | |
| ('Deseret', 'Deseret'), | |
| ('Devanagari', 'Devanagari'), | |
| ('Devanagari_Ext', 'Devanagari_Extended'), | |
| ('Diacriticals', 'Combining_Diacritical_Marks'), | |
| ('Diacriticals_Ext', 'Combining_Diacritical_Marks_Extended'), | |
| ('Diacriticals_For_Symbols', 'Combining_Diacritical_Marks_For_Symbols', 'Combining_Marks_For_Symbols'), | |
| ('Diacriticals_Sup', 'Combining_Diacritical_Marks_Supplement'), | |
| ('Dingbats', 'Dingbats'), | |
| ('Domino', 'Domino_Tiles'), | |
| ('Duployan', 'Duployan'), | |
| ('Early_Dynastic_Cuneiform', 'Early_Dynastic_Cuneiform'), | |
| ('Egyptian_Hieroglyphs', 'Egyptian_Hieroglyphs'), | |
| ('Elbasan', 'Elbasan'), | |
| ('Emoticons', 'Emoticons'), | |
| ('Enclosed_Alphanum', 'Enclosed_Alphanumerics'), | |
| ('Enclosed_Alphanum_Sup', 'Enclosed_Alphanumeric_Supplement'), | |
| ('Enclosed_CJK', 'Enclosed_CJK_Letters_And_Months'), | |
| ('Enclosed_Ideographic_Sup', 'Enclosed_Ideographic_Supplement'), | |
| ('Ethiopic', 'Ethiopic'), | |
| ('Ethiopic_Ext', 'Ethiopic_Extended'), | |
| ('Ethiopic_Ext_A', 'Ethiopic_Extended_A'), | |
| ('Ethiopic_Sup', 'Ethiopic_Supplement'), | |
| ('Geometric_Shapes', 'Geometric_Shapes'), | |
| ('Geometric_Shapes_Ext', 'Geometric_Shapes_Extended'), | |
| ('Georgian', 'Georgian'), | |
| ('Georgian_Sup', 'Georgian_Supplement'), | |
| ('Glagolitic', 'Glagolitic'), | |
| ('Glagolitic_Sup', 'Glagolitic_Supplement'), | |
| ('Gothic', 'Gothic'), | |
| ('Grantha', 'Grantha'), | |
| ('Greek', 'Greek_And_Coptic'), | |
| ('Greek_Ext', 'Greek_Extended'), | |
| ('Gujarati', 'Gujarati'), | |
| ('Gurmukhi', 'Gurmukhi'), | |
| ('Half_And_Full_Forms', 'Halfwidth_And_Fullwidth_Forms'), | |
| ('Half_Marks', 'Combining_Half_Marks'), | |
| ('Hangul', 'Hangul_Syllables'), | |
| ('Hanunoo', 'Hanunoo'), | |
| ('Hatran', 'Hatran'), | |
| ('Hebrew', 'Hebrew'), | |
| ('High_PU_Surrogates', 'High_Private_Use_Surrogates'), | |
| ('High_Surrogates', 'High_Surrogates'), | |
| ('Hiragana', 'Hiragana'), | |
| ('IDC', 'Ideographic_Description_Characters'), | |
| ('Ideographic_Symbols', 'Ideographic_Symbols_And_Punctuation'), | |
| ('Imperial_Aramaic', 'Imperial_Aramaic'), | |
| ('Indic_Number_Forms', 'Common_Indic_Number_Forms'), | |
| ('Inscriptional_Pahlavi', 'Inscriptional_Pahlavi'), | |
| ('Inscriptional_Parthian', 'Inscriptional_Parthian'), | |
| ('IPA_Ext', 'IPA_Extensions'), | |
| ('Jamo', 'Hangul_Jamo'), | |
| ('Jamo_Ext_A', 'Hangul_Jamo_Extended_A'), | |
| ('Jamo_Ext_B', 'Hangul_Jamo_Extended_B'), | |
| ('Javanese', 'Javanese'), | |
| ('Kaithi', 'Kaithi'), | |
| ('Kana_Sup', 'Kana_Supplement'), | |
| ('Kanbun', 'Kanbun'), | |
| ('Kangxi', 'Kangxi_Radicals'), | |
| ('Kannada', 'Kannada'), | |
| ('Katakana', 'Katakana'), | |
| ('Katakana_Ext', 'Katakana_Phonetic_Extensions'), | |
| ('Kayah_Li', 'Kayah_Li'), | |
| ('Kharoshthi', 'Kharoshthi'), | |
| ('Khmer', 'Khmer'), | |
| ('Khmer_Symbols', 'Khmer_Symbols'), | |
| ('Khojki', 'Khojki'), | |
| ('Khudawadi', 'Khudawadi'), | |
| ('Lao', 'Lao'), | |
| ('Latin_1_Sup', 'Latin_1_Supplement', 'Latin_1'), | |
| ('Latin_Ext_A', 'Latin_Extended_A'), | |
| ('Latin_Ext_Additional', 'Latin_Extended_Additional'), | |
| ('Latin_Ext_B', 'Latin_Extended_B'), | |
| ('Latin_Ext_C', 'Latin_Extended_C'), | |
| ('Latin_Ext_D', 'Latin_Extended_D'), | |
| ('Latin_Ext_E', 'Latin_Extended_E'), | |
| ('Lepcha', 'Lepcha'), | |
| ('Letterlike_Symbols', 'Letterlike_Symbols'), | |
| ('Limbu', 'Limbu'), | |
| ('Linear_A', 'Linear_A'), | |
| ('Linear_B_Ideograms', 'Linear_B_Ideograms'), | |
| ('Linear_B_Syllabary', 'Linear_B_Syllabary'), | |
| ('Lisu', 'Lisu'), | |
| ('Low_Surrogates', 'Low_Surrogates'), | |
| ('Lycian', 'Lycian'), | |
| ('Lydian', 'Lydian'), | |
| ('Mahajani', 'Mahajani'), | |
| ('Mahjong', 'Mahjong_Tiles'), | |
| ('Malayalam', 'Malayalam'), | |
| ('Mandaic', 'Mandaic'), | |
| ('Manichaean', 'Manichaean'), | |
| ('Marchen', 'Marchen'), | |
| ('Math_Alphanum', 'Mathematical_Alphanumeric_Symbols'), | |
| ('Math_Operators', 'Mathematical_Operators'), | |
| ('Meetei_Mayek', 'Meetei_Mayek'), | |
| ('Meetei_Mayek_Ext', 'Meetei_Mayek_Extensions'), | |
| ('Mende_Kikakui', 'Mende_Kikakui'), | |
| ('Meroitic_Cursive', 'Meroitic_Cursive'), | |
| ('Meroitic_Hieroglyphs', 'Meroitic_Hieroglyphs'), | |
| ('Miao', 'Miao'), | |
| ('Misc_Arrows', 'Miscellaneous_Symbols_And_Arrows'), | |
| ('Misc_Math_Symbols_A', 'Miscellaneous_Mathematical_Symbols_A'), | |
| ('Misc_Math_Symbols_B', 'Miscellaneous_Mathematical_Symbols_B'), | |
| ('Misc_Pictographs', 'Miscellaneous_Symbols_And_Pictographs'), | |
| ('Misc_Symbols', 'Miscellaneous_Symbols'), | |
| ('Misc_Technical', 'Miscellaneous_Technical'), | |
| ('Modi', 'Modi'), | |
| ('Modifier_Letters', 'Spacing_Modifier_Letters'), | |
| ('Modifier_Tone_Letters', 'Modifier_Tone_Letters'), | |
| ('Mongolian', 'Mongolian'), | |
| ('Mongolian_Sup', 'Mongolian_Supplement'), | |
| ('Mro', 'Mro'), | |
| ('Multani', 'Multani'), | |
| ('Music', 'Musical_Symbols'), | |
| ('Myanmar', 'Myanmar'), | |
| ('Myanmar_Ext_A', 'Myanmar_Extended_A'), | |
| ('Myanmar_Ext_B', 'Myanmar_Extended_B'), | |
| ('Nabataean', 'Nabataean'), | |
| ('NB', 'No_Block'), | |
| ('New_Tai_Lue', 'New_Tai_Lue'), | |
| ('Newa', 'Newa'), | |
| ('NKo', 'NKo'), | |
| ('Number_Forms', 'Number_Forms'), | |
| ('OCR', 'Optical_Character_Recognition'), | |
| ('Ogham', 'Ogham'), | |
| ('Ol_Chiki', 'Ol_Chiki'), | |
| ('Old_Hungarian', 'Old_Hungarian'), | |
| ('Old_Italic', 'Old_Italic'), | |
| ('Old_North_Arabian', 'Old_North_Arabian'), | |
| ('Old_Permic', 'Old_Permic'), | |
| ('Old_Persian', 'Old_Persian'), | |
| ('Old_South_Arabian', 'Old_South_Arabian'), | |
| ('Old_Turkic', 'Old_Turkic'), | |
| ('Oriya', 'Oriya'), | |
| ('Ornamental_Dingbats', 'Ornamental_Dingbats'), | |
| ('Osage', 'Osage'), | |
| ('Osmanya', 'Osmanya'), | |
| ('Pahawh_Hmong', 'Pahawh_Hmong'), | |
| ('Palmyrene', 'Palmyrene'), | |
| ('Pau_Cin_Hau', 'Pau_Cin_Hau'), | |
| ('Phags_Pa', 'Phags_Pa'), | |
| ('Phaistos', 'Phaistos_Disc'), | |
| ('Phoenician', 'Phoenician'), | |
| ('Phonetic_Ext', 'Phonetic_Extensions'), | |
| ('Phonetic_Ext_Sup', 'Phonetic_Extensions_Supplement'), | |
| ('Playing_Cards', 'Playing_Cards'), | |
| ('Psalter_Pahlavi', 'Psalter_Pahlavi'), | |
| ('PUA', 'Private_Use_Area', 'Private_Use'), | |
| ('Punctuation', 'General_Punctuation'), | |
| ('Rejang', 'Rejang'), | |
| ('Rumi', 'Rumi_Numeral_Symbols'), | |
| ('Runic', 'Runic'), | |
| ('Samaritan', 'Samaritan'), | |
| ('Saurashtra', 'Saurashtra'), | |
| ('Sharada', 'Sharada'), | |
| ('Shavian', 'Shavian'), | |
| ('Shorthand_Format_Controls', 'Shorthand_Format_Controls'), | |
| ('Siddham', 'Siddham'), | |
| ('Sinhala', 'Sinhala'), | |
| ('Sinhala_Archaic_Numbers', 'Sinhala_Archaic_Numbers'), | |
| ('Small_Forms', 'Small_Form_Variants'), | |
| ('Sora_Sompeng', 'Sora_Sompeng'), | |
| ('Specials', 'Specials'), | |
| ('Sundanese', 'Sundanese'), | |
| ('Sundanese_Sup', 'Sundanese_Supplement'), | |
| ('Sup_Arrows_A', 'Supplemental_Arrows_A'), | |
| ('Sup_Arrows_B', 'Supplemental_Arrows_B'), | |
| ('Sup_Arrows_C', 'Supplemental_Arrows_C'), | |
| ('Sup_Math_Operators', 'Supplemental_Mathematical_Operators'), | |
| ('Sup_PUA_A', 'Supplementary_Private_Use_Area_A'), | |
| ('Sup_PUA_B', 'Supplementary_Private_Use_Area_B'), | |
| ('Sup_Punctuation', 'Supplemental_Punctuation'), | |
| ('Sup_Symbols_And_Pictographs', 'Supplemental_Symbols_And_Pictographs'), | |
| ('Super_And_Sub', 'Superscripts_And_Subscripts'), | |
| ('Sutton_SignWriting', 'Sutton_SignWriting'), | |
| ('Syloti_Nagri', 'Syloti_Nagri'), | |
| ('Syriac', 'Syriac'), | |
| ('Tagalog', 'Tagalog'), | |
| ('Tagbanwa', 'Tagbanwa'), | |
| ('Tags', 'Tags'), | |
| ('Tai_Le', 'Tai_Le'), | |
| ('Tai_Tham', 'Tai_Tham'), | |
| ('Tai_Viet', 'Tai_Viet'), | |
| ('Tai_Xuan_Jing', 'Tai_Xuan_Jing_Symbols'), | |
| ('Takri', 'Takri'), | |
| ('Tamil', 'Tamil'), | |
| ('Tangut', 'Tangut'), | |
| ('Tangut_Components', 'Tangut_Components'), | |
| ('Telugu', 'Telugu'), | |
| ('Thaana', 'Thaana'), | |
| ('Thai', 'Thai'), | |
| ('Tibetan', 'Tibetan'), | |
| ('Tifinagh', 'Tifinagh'), | |
| ('Tirhuta', 'Tirhuta'), | |
| ('Transport_And_Map', 'Transport_And_Map_Symbols'), | |
| ('UCAS', 'Unified_Canadian_Aboriginal_Syllabics', 'Canadian_Syllabics'), | |
| ('UCAS_Ext', 'Unified_Canadian_Aboriginal_Syllabics_Extended'), | |
| ('Ugaritic', 'Ugaritic'), | |
| ('Vai', 'Vai'), | |
| ('Vedic_Ext', 'Vedic_Extensions'), | |
| ('Vertical_Forms', 'Vertical_Forms'), | |
| ('VS', 'Variation_Selectors'), | |
| ('VS_Sup', 'Variation_Selectors_Supplement'), | |
| ('Warang_Citi', 'Warang_Citi'), | |
| ('Yi_Radicals', 'Yi_Radicals'), | |
| ('Yi_Syllables', 'Yi_Syllables'), | |
| ('Yijing', 'Yijing_Hexagram_Symbols'), | |
| ], catalog=True, min='9.0.0') | |
| prop.make_enum(g, ['sc', 'Script'], [ | |
| ('Adlm', 'Adlam'), | |
| ('Aghb', 'Caucasian_Albanian'), | |
| ('Ahom', 'Ahom'), | |
| ('Arab', 'Arabic'), | |
| ('Armi', 'Imperial_Aramaic'), | |
| ('Armn', 'Armenian'), | |
| ('Avst', 'Avestan'), | |
| ('Bali', 'Balinese'), | |
| ('Bamu', 'Bamum'), | |
| ('Bass', 'Bassa_Vah'), | |
| ('Batk', 'Batak'), | |
| ('Beng', 'Bengali'), | |
| ('Bhks', 'Bhaiksuki'), | |
| ('Bopo', 'Bopomofo'), | |
| ('Brah', 'Brahmi'), | |
| ('Brai', 'Braille'), | |
| ('Bugi', 'Buginese'), | |
| ('Buhd', 'Buhid'), | |
| ('Cakm', 'Chakma'), | |
| ('Cans', 'Canadian_Aboriginal'), | |
| ('Cari', 'Carian'), | |
| ('Cham', 'Cham'), | |
| ('Cher', 'Cherokee'), | |
| ('Copt', 'Coptic', 'Qaac'), | |
| ('Cprt', 'Cypriot'), | |
| ('Cyrl', 'Cyrillic'), | |
| ('Deva', 'Devanagari'), | |
| ('Dsrt', 'Deseret'), | |
| ('Dupl', 'Duployan'), | |
| ('Egyp', 'Egyptian_Hieroglyphs'), | |
| ('Elba', 'Elbasan'), | |
| ('Ethi', 'Ethiopic'), | |
| ('Geor', 'Georgian'), | |
| ('Glag', 'Glagolitic'), | |
| ('Goth', 'Gothic'), | |
| ('Gran', 'Grantha'), | |
| ('Grek', 'Greek'), | |
| ('Gujr', 'Gujarati'), | |
| ('Guru', 'Gurmukhi'), | |
| ('Hang', 'Hangul'), | |
| ('Hani', 'Han'), | |
| ('Hano', 'Hanunoo'), | |
| ('Hatr', 'Hatran'), | |
| ('Hebr', 'Hebrew'), | |
| ('Hira', 'Hiragana'), | |
| ('Hluw', 'Anatolian_Hieroglyphs'), | |
| ('Hmng', 'Pahawh_Hmong'), | |
| ('Hrkt', 'Katakana_Or_Hiragana'), | |
| ('Hung', 'Old_Hungarian'), | |
| ('Ital', 'Old_Italic'), | |
| ('Java', 'Javanese'), | |
| ('Kali', 'Kayah_Li'), | |
| ('Kana', 'Katakana'), | |
| ('Khar', 'Kharoshthi'), | |
| ('Khmr', 'Khmer'), | |
| ('Khoj', 'Khojki'), | |
| ('Knda', 'Kannada'), | |
| ('Kthi', 'Kaithi'), | |
| ('Lana', 'Tai_Tham'), | |
| ('Laoo', 'Lao'), | |
| ('Latn', 'Latin'), | |
| ('Lepc', 'Lepcha'), | |
| ('Limb', 'Limbu'), | |
| ('Lina', 'Linear_A'), | |
| ('Linb', 'Linear_B'), | |
| ('Lisu', 'Lisu'), | |
| ('Lyci', 'Lycian'), | |
| ('Lydi', 'Lydian'), | |
| ('Mahj', 'Mahajani'), | |
| ('Mand', 'Mandaic'), | |
| ('Mani', 'Manichaean'), | |
| ('Marc', 'Marchen'), | |
| ('Mend', 'Mende_Kikakui'), | |
| ('Merc', 'Meroitic_Cursive'), | |
| ('Mero', 'Meroitic_Hieroglyphs'), | |
| ('Mlym', 'Malayalam'), | |
| ('Modi', 'Modi'), | |
| ('Mong', 'Mongolian'), | |
| ('Mroo', 'Mro'), | |
| ('Mtei', 'Meetei_Mayek'), | |
| ('Mult', 'Multani'), | |
| ('Mymr', 'Myanmar'), | |
| ('Narb', 'Old_North_Arabian'), | |
| ('Nbat', 'Nabataean'), | |
| ('Newa', 'Newa'), | |
| ('Nkoo', 'Nko'), | |
| ('Ogam', 'Ogham'), | |
| ('Olck', 'Ol_Chiki'), | |
| ('Orkh', 'Old_Turkic'), | |
| ('Orya', 'Oriya'), | |
| ('Osge', 'Osage'), | |
| ('Osma', 'Osmanya'), | |
| ('Palm', 'Palmyrene'), | |
| ('Pauc', 'Pau_Cin_Hau'), | |
| ('Perm', 'Old_Permic'), | |
| ('Phag', 'Phags_Pa'), | |
| ('Phli', 'Inscriptional_Pahlavi'), | |
| ('Phlp', 'Psalter_Pahlavi'), | |
| ('Phnx', 'Phoenician'), | |
| ('Plrd', 'Miao'), | |
| ('Prti', 'Inscriptional_Parthian'), | |
| ('Rjng', 'Rejang'), | |
| ('Runr', 'Runic'), | |
| ('Samr', 'Samaritan'), | |
| ('Sarb', 'Old_South_Arabian'), | |
| ('Saur', 'Saurashtra'), | |
| ('Sgnw', 'SignWriting'), | |
| ('Shaw', 'Shavian'), | |
| ('Shrd', 'Sharada'), | |
| ('Sidd', 'Siddham'), | |
| ('Sind', 'Khudawadi'), | |
| ('Sinh', 'Sinhala'), | |
| ('Sora', 'Sora_Sompeng'), | |
| ('Sund', 'Sundanese'), | |
| ('Sylo', 'Syloti_Nagri'), | |
| ('Syrc', 'Syriac'), | |
| ('Tagb', 'Tagbanwa'), | |
| ('Takr', 'Takri'), | |
| ('Tale', 'Tai_Le'), | |
| ('Talu', 'New_Tai_Lue'), | |
| ('Taml', 'Tamil'), | |
| ('Tang', 'Tangut'), | |
| ('Tavt', 'Tai_Viet'), | |
| ('Telu', 'Telugu'), | |
| ('Tfng', 'Tifinagh'), | |
| ('Tglg', 'Tagalog'), | |
| ('Thaa', 'Thaana'), | |
| ('Thai', 'Thai'), | |
| ('Tibt', 'Tibetan'), | |
| ('Tirh', 'Tirhuta'), | |
| ('Ugar', 'Ugaritic'), | |
| ('Vaii', 'Vai'), | |
| ('Wara', 'Warang_Citi'), | |
| ('Xpeo', 'Old_Persian'), | |
| ('Xsux', 'Cuneiform'), | |
| ('Yiii', 'Yi'), | |
| ('Zinh', 'Inherited', 'Qaai'), | |
| ('Zyyy', 'Common'), | |
| ('Zzzz', 'Unknown'), | |
| ], catalog=True, min='9.0.0') | |
| # Moved to be *after* Script | |
| prop.make_set(g, ['scx', 'Script_Extensions'], Script, min='9.0.0') | |
| # Enumerated Properties | |
| prop.make_enum(g, ['bc', 'Bidi_Class'], [ | |
| ('AL', 'Arabic_Letter'), | |
| ('AN', 'Arabic_Number'), | |
| ('B', 'Paragraph_Separator'), | |
| ('BN', 'Boundary_Neutral'), | |
| ('CS', 'Common_Separator'), | |
| ('EN', 'European_Number'), | |
| ('ES', 'European_Separator'), | |
| ('ET', 'European_Terminator'), | |
| ('FSI', 'First_Strong_Isolate'), | |
| ('L', 'Left_To_Right'), | |
| ('LRE', 'Left_To_Right_Embedding'), | |
| ('LRI', 'Left_To_Right_Isolate'), | |
| ('LRO', 'Left_To_Right_Override'), | |
| ('NSM', 'Nonspacing_Mark'), | |
| ('ON', 'Other_Neutral'), | |
| ('PDF', 'Pop_Directional_Format'), | |
| ('PDI', 'Pop_Directional_Isolate'), | |
| ('R', 'Right_To_Left'), | |
| ('RLE', 'Right_To_Left_Embedding'), | |
| ('RLI', 'Right_To_Left_Isolate'), | |
| ('RLO', 'Right_To_Left_Override'), | |
| ('S', 'Segment_Separator'), | |
| ('WS', 'White_Space'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['bpt', 'Bidi_Paired_Bracket_Type'], [ | |
| ('c', 'Close'), | |
| ('n', 'None'), | |
| ('o', 'Open'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['ccc', 'Canonical_Combining_Class'], [ | |
| ('0', 'NR', 'Not_Reordered'), | |
| ('1', 'OV', 'Overlay'), | |
| ('7', 'NK', 'Nukta'), | |
| ('8', 'KV', 'Kana_Voicing'), | |
| ('9', 'VR', 'Virama'), | |
| ('10', 'CCC10', 'CCC10'), | |
| ('11', 'CCC11', 'CCC11'), | |
| ('12', 'CCC12', 'CCC12'), | |
| ('13', 'CCC13', 'CCC13'), | |
| ('14', 'CCC14', 'CCC14'), | |
| ('15', 'CCC15', 'CCC15'), | |
| ('16', 'CCC16', 'CCC16'), | |
| ('17', 'CCC17', 'CCC17'), | |
| ('18', 'CCC18', 'CCC18'), | |
| ('19', 'CCC19', 'CCC19'), | |
| ('20', 'CCC20', 'CCC20'), | |
| ('21', 'CCC21', 'CCC21'), | |
| ('22', 'CCC22', 'CCC22'), | |
| ('23', 'CCC23', 'CCC23'), | |
| ('24', 'CCC24', 'CCC24'), | |
| ('25', 'CCC25', 'CCC25'), | |
| ('26', 'CCC26', 'CCC26'), | |
| ('27', 'CCC27', 'CCC27'), | |
| ('28', 'CCC28', 'CCC28'), | |
| ('29', 'CCC29', 'CCC29'), | |
| ('30', 'CCC30', 'CCC30'), | |
| ('31', 'CCC31', 'CCC31'), | |
| ('32', 'CCC32', 'CCC32'), | |
| ('33', 'CCC33', 'CCC33'), | |
| ('34', 'CCC34', 'CCC34'), | |
| ('35', 'CCC35', 'CCC35'), | |
| ('36', 'CCC36', 'CCC36'), | |
| ('84', 'CCC84', 'CCC84'), | |
| ('91', 'CCC91', 'CCC91'), | |
| ('103', 'CCC103', 'CCC103'), | |
| ('107', 'CCC107', 'CCC107'), | |
| ('118', 'CCC118', 'CCC118'), | |
| ('122', 'CCC122', 'CCC122'), | |
| ('129', 'CCC129', 'CCC129'), | |
| ('130', 'CCC130', 'CCC130'), | |
| ('132', 'CCC132', 'CCC132'), | |
| ('133', 'CCC133', 'CCC133'), | |
| ('200', 'ATBL', 'Attached_Below_Left'), | |
| ('202', 'ATB', 'Attached_Below'), | |
| ('214', 'ATA', 'Attached_Above'), | |
| ('216', 'ATAR', 'Attached_Above_Right'), | |
| ('218', 'BL', 'Below_Left'), | |
| ('220', 'B', 'Below'), | |
| ('222', 'BR', 'Below_Right'), | |
| ('224', 'L', 'Left'), | |
| ('226', 'R', 'Right'), | |
| ('228', 'AL', 'Above_Left'), | |
| ('230', 'A', 'Above'), | |
| ('232', 'AR', 'Above_Right'), | |
| ('233', 'DB', 'Double_Below'), | |
| ('234', 'DA', 'Double_Above'), | |
| ('240', 'IS', 'Iota_Subscript'), | |
| ], manual_index=True, min='9.0.0') | |
| prop.make_enum(g, ['dt', 'Decomposition_Type'], [ | |
| ('Can', 'Canonical', 'can'), | |
| ('Com', 'Compat', 'com'), | |
| ('Enc', 'Circle', 'enc'), | |
| ('Fin', 'Final', 'fin'), | |
| ('Font', 'Font', 'font'), | |
| ('Fra', 'Fraction', 'fra'), | |
| ('Init', 'Initial', 'init'), | |
| ('Iso', 'Isolated', 'iso'), | |
| ('Med', 'Medial', 'med'), | |
| ('Nar', 'Narrow', 'nar'), | |
| ('Nb', 'Nobreak', 'nb'), | |
| ('None', 'None', 'none'), | |
| ('Sml', 'Small', 'sml'), | |
| ('Sqr', 'Square', 'sqr'), | |
| ('Sub', 'Sub', 'sub'), | |
| ('Sup', 'Super', 'sup'), | |
| ('Vert', 'Vertical', 'vert'), | |
| ('Wide', 'Wide', 'wide'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['ea', 'East_Asian_Width'], [ | |
| ('A', 'Ambiguous'), | |
| ('F', 'Fullwidth'), | |
| ('H', 'Halfwidth'), | |
| ('N', 'Neutral'), | |
| ('Na', 'Narrow'), | |
| ('W', 'Wide'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['gc', 'General_Category'], [ | |
| ('C', 'Other'), | |
| ('Cc', 'Control', 'cntrl'), | |
| ('Cf', 'Format'), | |
| ('Cn', 'Unassigned'), | |
| ('Co', 'Private_Use'), | |
| ('Cs', 'Surrogate'), | |
| ('L', 'Letter'), | |
| ('LC', 'Cased_Letter'), | |
| ('Ll', 'Lowercase_Letter'), | |
| ('Lm', 'Modifier_Letter'), | |
| ('Lo', 'Other_Letter'), | |
| ('Lt', 'Titlecase_Letter'), | |
| ('Lu', 'Uppercase_Letter'), | |
| ('M', 'Mark', 'Combining_Mark'), | |
| ('Mc', 'Spacing_Mark'), | |
| ('Me', 'Enclosing_Mark'), | |
| ('Mn', 'Nonspacing_Mark'), | |
| ('N', 'Number'), | |
| ('Nd', 'Decimal_Number', 'digit'), | |
| ('Nl', 'Letter_Number'), | |
| ('No', 'Other_Number'), | |
| ('P', 'Punctuation', 'punct'), | |
| ('Pc', 'Connector_Punctuation'), | |
| ('Pd', 'Dash_Punctuation'), | |
| ('Pe', 'Close_Punctuation'), | |
| ('Pf', 'Final_Punctuation'), | |
| ('Pi', 'Initial_Punctuation'), | |
| ('Po', 'Other_Punctuation'), | |
| ('Ps', 'Open_Punctuation'), | |
| ('S', 'Symbol'), | |
| ('Sc', 'Currency_Symbol'), | |
| ('Sk', 'Modifier_Symbol'), | |
| ('Sm', 'Math_Symbol'), | |
| ('So', 'Other_Symbol'), | |
| ('Z', 'Separator'), | |
| ('Zl', 'Line_Separator'), | |
| ('Zp', 'Paragraph_Separator'), | |
| ('Zs', 'Space_Separator'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['GCB', 'Grapheme_Cluster_Break'], [ | |
| ('CN', 'Control'), | |
| ('CR', 'CR'), | |
| ('EB', 'E_Base'), | |
| ('EBG', 'E_Base_GAZ'), | |
| ('EM', 'E_Modifier'), | |
| ('EX', 'Extend'), | |
| ('GAZ', 'Glue_After_Zwj'), | |
| ('L', 'L'), | |
| ('LF', 'LF'), | |
| ('LV', 'LV'), | |
| ('LVT', 'LVT'), | |
| ('PP', 'Prepend'), | |
| ('RI', 'Regional_Indicator'), | |
| ('SM', 'SpacingMark'), | |
| ('T', 'T'), | |
| ('V', 'V'), | |
| ('XX', 'Other'), | |
| ('ZWJ', 'ZWJ'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['hst', 'Hangul_Syllable_Type'], [ | |
| ('L', 'Leading_Jamo'), | |
| ('LV', 'LV_Syllable'), | |
| ('LVT', 'LVT_Syllable'), | |
| ('NA', 'Not_Applicable'), | |
| ('T', 'Trailing_Jamo'), | |
| ('V', 'Vowel_Jamo'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['InPC', 'Indic_Positional_Category'], [ | |
| ('Bottom', 'Bottom'), | |
| ('Bottom_And_Right', 'Bottom_And_Right'), | |
| ('Left', 'Left'), | |
| ('Left_And_Right', 'Left_And_Right'), | |
| ('NA', 'NA'), | |
| ('Overstruck', 'Overstruck'), | |
| ('Right', 'Right'), | |
| ('Top', 'Top'), | |
| ('Top_And_Bottom', 'Top_And_Bottom'), | |
| ('Top_And_Bottom_And_Right', 'Top_And_Bottom_And_Right'), | |
| ('Top_And_Left', 'Top_And_Left'), | |
| ('Top_And_Left_And_Right', 'Top_And_Left_And_Right'), | |
| ('Top_And_Right', 'Top_And_Right'), | |
| ('Visual_Order_Left', 'Visual_Order_Left'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['InSC', 'Indic_Syllabic_Category'], [ | |
| ('Avagraha', 'Avagraha'), | |
| ('Bindu', 'Bindu'), | |
| ('Brahmi_Joining_Number', 'Brahmi_Joining_Number'), | |
| ('Cantillation_Mark', 'Cantillation_Mark'), | |
| ('Consonant', 'Consonant'), | |
| ('Consonant_Dead', 'Consonant_Dead'), | |
| ('Consonant_Final', 'Consonant_Final'), | |
| ('Consonant_Head_Letter', 'Consonant_Head_Letter'), | |
| ('Consonant_Killer', 'Consonant_Killer'), | |
| ('Consonant_Medial', 'Consonant_Medial'), | |
| ('Consonant_Placeholder', 'Consonant_Placeholder'), | |
| ('Consonant_Preceding_Repha', 'Consonant_Preceding_Repha'), | |
| ('Consonant_Prefixed', 'Consonant_Prefixed'), | |
| ('Consonant_Subjoined', 'Consonant_Subjoined'), | |
| ('Consonant_Succeeding_Repha', 'Consonant_Succeeding_Repha'), | |
| ('Consonant_With_Stacker', 'Consonant_With_Stacker'), | |
| ('Gemination_Mark', 'Gemination_Mark'), | |
| ('Invisible_Stacker', 'Invisible_Stacker'), | |
| ('Joiner', 'Joiner'), | |
| ('Modifying_Letter', 'Modifying_Letter'), | |
| ('Non_Joiner', 'Non_Joiner'), | |
| ('Nukta', 'Nukta'), | |
| ('Number', 'Number'), | |
| ('Number_Joiner', 'Number_Joiner'), | |
| ('Other', 'Other'), | |
| ('Pure_Killer', 'Pure_Killer'), | |
| ('Register_Shifter', 'Register_Shifter'), | |
| ('Syllable_Modifier', 'Syllable_Modifier'), | |
| ('Tone_Letter', 'Tone_Letter'), | |
| ('Tone_Mark', 'Tone_Mark'), | |
| ('Virama', 'Virama'), | |
| ('Visarga', 'Visarga'), | |
| ('Vowel', 'Vowel'), | |
| ('Vowel_Dependent', 'Vowel_Dependent'), | |
| ('Vowel_Independent', 'Vowel_Independent'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['jg', 'Joining_Group'], [ | |
| ('African_Feh', 'African_Feh'), | |
| ('African_Noon', 'African_Noon'), | |
| ('African_Qaf', 'African_Qaf'), | |
| ('Ain', 'Ain'), | |
| ('Alaph', 'Alaph'), | |
| ('Alef', 'Alef'), | |
| ('Beh', 'Beh'), | |
| ('Beth', 'Beth'), | |
| ('Burushaski_Yeh_Barree', 'Burushaski_Yeh_Barree'), | |
| ('Dal', 'Dal'), | |
| ('Dalath_Rish', 'Dalath_Rish'), | |
| ('E', 'E'), | |
| ('Farsi_Yeh', 'Farsi_Yeh'), | |
| ('Fe', 'Fe'), | |
| ('Feh', 'Feh'), | |
| ('Final_Semkath', 'Final_Semkath'), | |
| ('Gaf', 'Gaf'), | |
| ('Gamal', 'Gamal'), | |
| ('Hah', 'Hah'), | |
| ('He', 'He'), | |
| ('Heh', 'Heh'), | |
| ('Heh_Goal', 'Heh_Goal'), | |
| ('Heth', 'Heth'), | |
| ('Kaf', 'Kaf'), | |
| ('Kaph', 'Kaph'), | |
| ('Khaph', 'Khaph'), | |
| ('Knotted_Heh', 'Knotted_Heh'), | |
| ('Lam', 'Lam'), | |
| ('Lamadh', 'Lamadh'), | |
| ('Manichaean_Aleph', 'Manichaean_Aleph'), | |
| ('Manichaean_Ayin', 'Manichaean_Ayin'), | |
| ('Manichaean_Beth', 'Manichaean_Beth'), | |
| ('Manichaean_Daleth', 'Manichaean_Daleth'), | |
| ('Manichaean_Dhamedh', 'Manichaean_Dhamedh'), | |
| ('Manichaean_Five', 'Manichaean_Five'), | |
| ('Manichaean_Gimel', 'Manichaean_Gimel'), | |
| ('Manichaean_Heth', 'Manichaean_Heth'), | |
| ('Manichaean_Hundred', 'Manichaean_Hundred'), | |
| ('Manichaean_Kaph', 'Manichaean_Kaph'), | |
| ('Manichaean_Lamedh', 'Manichaean_Lamedh'), | |
| ('Manichaean_Mem', 'Manichaean_Mem'), | |
| ('Manichaean_Nun', 'Manichaean_Nun'), | |
| ('Manichaean_One', 'Manichaean_One'), | |
| ('Manichaean_Pe', 'Manichaean_Pe'), | |
| ('Manichaean_Qoph', 'Manichaean_Qoph'), | |
| ('Manichaean_Resh', 'Manichaean_Resh'), | |
| ('Manichaean_Sadhe', 'Manichaean_Sadhe'), | |
| ('Manichaean_Samekh', 'Manichaean_Samekh'), | |
| ('Manichaean_Taw', 'Manichaean_Taw'), | |
| ('Manichaean_Ten', 'Manichaean_Ten'), | |
| ('Manichaean_Teth', 'Manichaean_Teth'), | |
| ('Manichaean_Thamedh', 'Manichaean_Thamedh'), | |
| ('Manichaean_Twenty', 'Manichaean_Twenty'), | |
| ('Manichaean_Waw', 'Manichaean_Waw'), | |
| ('Manichaean_Yodh', 'Manichaean_Yodh'), | |
| ('Manichaean_Zayin', 'Manichaean_Zayin'), | |
| ('Meem', 'Meem'), | |
| ('Mim', 'Mim'), | |
| ('No_Joining_Group', 'No_Joining_Group'), | |
| ('Noon', 'Noon'), | |
| ('Nun', 'Nun'), | |
| ('Nya', 'Nya'), | |
| ('Pe', 'Pe'), | |
| ('Qaf', 'Qaf'), | |
| ('Qaph', 'Qaph'), | |
| ('Reh', 'Reh'), | |
| ('Reversed_Pe', 'Reversed_Pe'), | |
| ('Rohingya_Yeh', 'Rohingya_Yeh'), | |
| ('Sad', 'Sad'), | |
| ('Sadhe', 'Sadhe'), | |
| ('Seen', 'Seen'), | |
| ('Semkath', 'Semkath'), | |
| ('Shin', 'Shin'), | |
| ('Straight_Waw', 'Straight_Waw'), | |
| ('Swash_Kaf', 'Swash_Kaf'), | |
| ('Syriac_Waw', 'Syriac_Waw'), | |
| ('Tah', 'Tah'), | |
| ('Taw', 'Taw'), | |
| ('Teh_Marbuta', 'Teh_Marbuta'), | |
| ('Teh_Marbuta_Goal', 'Hamza_On_Heh_Goal'), | |
| ('Teth', 'Teth'), | |
| ('Waw', 'Waw'), | |
| ('Yeh', 'Yeh'), | |
| ('Yeh_Barree', 'Yeh_Barree'), | |
| ('Yeh_With_Tail', 'Yeh_With_Tail'), | |
| ('Yudh', 'Yudh'), | |
| ('Yudh_He', 'Yudh_He'), | |
| ('Zain', 'Zain'), | |
| ('Zhain', 'Zhain'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['jt', 'Joining_Type'], [ | |
| ('C', 'Join_Causing'), | |
| ('D', 'Dual_Joining'), | |
| ('L', 'Left_Joining'), | |
| ('R', 'Right_Joining'), | |
| ('T', 'Transparent'), | |
| ('U', 'Non_Joining'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['lb', 'Line_Break'], [ | |
| ('AI', 'Ambiguous'), | |
| ('AL', 'Alphabetic'), | |
| ('B2', 'Break_Both'), | |
| ('BA', 'Break_After'), | |
| ('BB', 'Break_Before'), | |
| ('BK', 'Mandatory_Break'), | |
| ('CB', 'Contingent_Break'), | |
| ('CJ', 'Conditional_Japanese_Starter'), | |
| ('CL', 'Close_Punctuation'), | |
| ('CM', 'Combining_Mark'), | |
| ('CP', 'Close_Parenthesis'), | |
| ('CR', 'Carriage_Return'), | |
| ('EB', 'E_Base'), | |
| ('EM', 'E_Modifier'), | |
| ('EX', 'Exclamation'), | |
| ('GL', 'Glue'), | |
| ('H2', 'H2'), | |
| ('H3', 'H3'), | |
| ('HL', 'Hebrew_Letter'), | |
| ('HY', 'Hyphen'), | |
| ('ID', 'Ideographic'), | |
| ('IN', 'Inseparable', 'Inseperable'), | |
| ('IS', 'Infix_Numeric'), | |
| ('JL', 'JL'), | |
| ('JT', 'JT'), | |
| ('JV', 'JV'), | |
| ('LF', 'Line_Feed'), | |
| ('NL', 'Next_Line'), | |
| ('NS', 'Nonstarter'), | |
| ('NU', 'Numeric'), | |
| ('OP', 'Open_Punctuation'), | |
| ('PO', 'Postfix_Numeric'), | |
| ('PR', 'Prefix_Numeric'), | |
| ('QU', 'Quotation'), | |
| ('RI', 'Regional_Indicator'), | |
| ('SA', 'Complex_Context'), | |
| ('SG', 'Surrogate'), | |
| ('SP', 'Space'), | |
| ('SY', 'Break_Symbols'), | |
| ('WJ', 'Word_Joiner'), | |
| ('XX', 'Unknown'), | |
| ('ZW', 'ZWSpace'), | |
| ('ZWJ', 'ZWJ'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['NFC_QC', 'NFC_Quick_Check'], [ | |
| ('M', 'Maybe'), | |
| ('N', 'No'), | |
| ('Y', 'Yes'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['NFD_QC', 'NFD_Quick_Check'], [ | |
| ('N', 'No'), | |
| ('Y', 'Yes'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['NFKC_QC', 'NFKC_Quick_Check'], [ | |
| ('M', 'Maybe'), | |
| ('N', 'No'), | |
| ('Y', 'Yes'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['NFKD_QC', 'NFKD_Quick_Check'], [ | |
| ('N', 'No'), | |
| ('Y', 'Yes'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['nt', 'Numeric_Type'], [ | |
| ('De', 'Decimal'), | |
| ('Di', 'Digit'), | |
| ('None', 'None'), | |
| ('Nu', 'Numeric'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['SB', 'Sentence_Break'], [ | |
| ('AT', 'ATerm'), | |
| ('CL', 'Close'), | |
| ('CR', 'CR'), | |
| ('EX', 'Extend'), | |
| ('FO', 'Format'), | |
| ('LE', 'OLetter'), | |
| ('LF', 'LF'), | |
| ('LO', 'Lower'), | |
| ('NU', 'Numeric'), | |
| ('SC', 'SContinue'), | |
| ('SE', 'Sep'), | |
| ('SP', 'Sp'), | |
| ('ST', 'STerm'), | |
| ('UP', 'Upper'), | |
| ('XX', 'Other'), | |
| ], min='9.0.0') | |
| prop.make_enum(g, ['WB', 'Word_Break'], [ | |
| ('CR', 'CR'), | |
| ('DQ', 'Double_Quote'), | |
| ('EB', 'E_Base'), | |
| ('EBG', 'E_Base_GAZ'), | |
| ('EM', 'E_Modifier'), | |
| ('EX', 'ExtendNumLet'), | |
| ('Extend', 'Extend'), | |
| ('FO', 'Format'), | |
| ('GAZ', 'Glue_After_Zwj'), | |
| ('HL', 'Hebrew_Letter'), | |
| ('KA', 'Katakana'), | |
| ('LE', 'ALetter'), | |
| ('LF', 'LF'), | |
| ('MB', 'MidNumLet'), | |
| ('ML', 'MidLetter'), | |
| ('MN', 'MidNum'), | |
| ('NL', 'Newline'), | |
| ('NU', 'Numeric'), | |
| ('RI', 'Regional_Indicator'), | |
| ('SQ', 'Single_Quote'), | |
| ('XX', 'Other'), | |
| ('ZWJ', 'ZWJ'), | |
| ], min='9.0.0') | |
| # Binary Properties | |
| prop.make_bool(g, ['AHex', 'ASCII_Hex_Digit'], min='9.0.0') | |
| prop.make_bool(g, ['Alpha', 'Alphabetic'], min='9.0.0') | |
| prop.make_bool(g, ['Bidi_C', 'Bidi_Control'], min='9.0.0') | |
| prop.make_bool(g, ['Bidi_M', 'Bidi_Mirrored'], min='9.0.0') | |
| prop.make_bool(g, ['Cased', 'Cased'], min='9.0.0') | |
| prop.make_bool(g, ['CE', 'Composition_Exclusion'], min='9.0.0') | |
| prop.make_bool(g, ['CI', 'Case_Ignorable'], min='9.0.0') | |
| prop.make_bool(g, ['Comp_Ex', 'Full_Composition_Exclusion'], min='9.0.0') | |
| prop.make_bool(g, ['CWCF', 'Changes_When_Casefolded'], min='9.0.0') | |
| prop.make_bool(g, ['CWCM', 'Changes_When_Casemapped'], min='9.0.0') | |
| prop.make_bool(g, ['CWKCF', 'Changes_When_NFKC_Casefolded'], min='9.0.0') | |
| prop.make_bool(g, ['CWL', 'Changes_When_Lowercased'], min='9.0.0') | |
| prop.make_bool(g, ['CWT', 'Changes_When_Titlecased'], min='9.0.0') | |
| prop.make_bool(g, ['CWU', 'Changes_When_Uppercased'], min='9.0.0') | |
| prop.make_bool(g, ['Dash', 'Dash'], min='9.0.0') | |
| prop.make_bool(g, ['Dep', 'Deprecated'], min='9.0.0') | |
| prop.make_bool(g, ['DI', 'Default_Ignorable_Code_Point'], min='9.0.0') | |
| prop.make_bool(g, ['Dia', 'Diacritic'], min='9.0.0') | |
| prop.make_bool(g, ['Ext', 'Extender'], min='9.0.0') | |
| prop.make_bool(g, ['Gr_Base', 'Grapheme_Base'], min='9.0.0') | |
| prop.make_bool(g, ['Gr_Ext', 'Grapheme_Extend'], min='9.0.0') | |
| prop.make_bool(g, ['Gr_Link', 'Grapheme_Link'], min='9.0.0') | |
| prop.make_bool(g, ['Hex', 'Hex_Digit'], min='9.0.0') | |
| prop.make_bool(g, ['Hyphen', 'Hyphen'], min='9.0.0') | |
| prop.make_bool(g, ['IDC', 'ID_Continue'], min='9.0.0') | |
| prop.make_bool(g, ['Ideo', 'Ideographic'], min='9.0.0') | |
| prop.make_bool(g, ['IDS', 'ID_Start'], min='9.0.0') | |
| prop.make_bool(g, ['IDSB', 'IDS_Binary_Operator'], min='9.0.0') | |
| prop.make_bool(g, ['IDST', 'IDS_Trinary_Operator'], min='9.0.0') | |
| prop.make_bool(g, ['Join_C', 'Join_Control'], min='9.0.0') | |
| prop.make_bool(g, ['LOE', 'Logical_Order_Exception'], min='9.0.0') | |
| prop.make_bool(g, ['Lower', 'Lowercase'], min='9.0.0') | |
| prop.make_bool(g, ['Math', 'Math'], min='9.0.0') | |
| prop.make_bool(g, ['NChar', 'Noncharacter_Code_Point'], min='9.0.0') | |
| prop.make_bool(g, ['OAlpha', 'Other_Alphabetic'], min='9.0.0') | |
| prop.make_bool(g, ['ODI', 'Other_Default_Ignorable_Code_Point'], min='9.0.0') | |
| prop.make_bool(g, ['OGr_Ext', 'Other_Grapheme_Extend'], min='9.0.0') | |
| prop.make_bool(g, ['OIDC', 'Other_ID_Continue'], min='9.0.0') | |
| prop.make_bool(g, ['OIDS', 'Other_ID_Start'], min='9.0.0') | |
| prop.make_bool(g, ['OLower', 'Other_Lowercase'], min='9.0.0') | |
| prop.make_bool(g, ['OMath', 'Other_Math'], min='9.0.0') | |
| prop.make_bool(g, ['OUpper', 'Other_Uppercase'], min='9.0.0') | |
| prop.make_bool(g, ['Pat_Syn', 'Pattern_Syntax'], min='9.0.0') | |
| prop.make_bool(g, ['Pat_WS', 'Pattern_White_Space'], min='9.0.0') | |
| prop.make_bool(g, ['PCM', 'Prepended_Concatenation_Mark'], min='9.0.0') | |
| prop.make_bool(g, ['QMark', 'Quotation_Mark'], min='9.0.0') | |
| prop.make_bool(g, ['Radical', 'Radical'], min='9.0.0') | |
| prop.make_bool(g, ['SD', 'Soft_Dotted'], min='9.0.0') | |
| prop.make_bool(g, ['STerm', 'Sentence_Terminal'], min='9.0.0') | |
| prop.make_bool(g, ['Term', 'Terminal_Punctuation'], min='9.0.0') | |
| prop.make_bool(g, ['UIdeo', 'Unified_Ideograph'], min='9.0.0') | |
| prop.make_bool(g, ['Upper', 'Uppercase'], min='9.0.0') | |
| prop.make_bool(g, ['VS', 'Variation_Selector'], min='9.0.0') | |
| prop.make_bool(g, ['WSpace', 'White_Space', 'space'], min='9.0.0') | |
| prop.make_bool(g, ['XIDC', 'XID_Continue'], min='9.0.0') | |
| prop.make_bool(g, ['XIDS', 'XID_Start'], min='9.0.0') | |
| prop.make_bool(g, ['XO_NFC', 'Expands_On_NFC'], min='9.0.0') | |
| prop.make_bool(g, ['XO_NFD', 'Expands_On_NFD'], min='9.0.0') | |
| prop.make_bool(g, ['XO_NFKC', 'Expands_On_NFKC'], min='9.0.0') | |
| prop.make_bool(g, ['XO_NFKD', 'Expands_On_NFKD'], min='9.0.0') | |
| _init_classes() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ''' Fixed classes used to support uni.alias and its dynamic classes. | |
| ''' | |
| import decimal, fractions | |
| from functools import partial | |
| from . import _util | |
| def _split(s): | |
| rv = s.split() | |
| assert set(rv) == set(s.split(' ')) - {''} | |
| return rv | |
| class _Property: | |
| ''' Base of all exposed property classes. | |
| By default, this class is not instantiable. | |
| ''' | |
| def __new__(cls, *args, **kwargs): | |
| if not issubclass(cls, _InstantiableProperty): | |
| raise NotImplementedError('most property classes are not instantiable') | |
| return object.__new__(cls) | |
| def __init__(self): | |
| super().__init__() | |
| @classmethod | |
| def check(cls, val): | |
| raise NotImplementedError('Needs to be implemented in subclasses!') | |
| @classmethod | |
| def convert(cls, val): | |
| raise NotImplementedError('Needs to be implemented in subclasses!') | |
| # TODO actually optimize using bitsets when possible. | |
| class Set(_Property): | |
| ''' Properties whose values are a set of 0 or more of another property. | |
| New subclasses are created dynamically by make_set. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert issubclass(cls.element, _Property) | |
| assert isinstance(val, set) | |
| for v in val: | |
| assert v is cls.element.filter(v) | |
| @classmethod | |
| def convert(cls, val): | |
| assert issubclass(cls.element, _Property) | |
| assert isinstance(val, str) | |
| rvl = [cls.element.convert(v) for v in _split(val)] | |
| rv = set(rvl) | |
| assert len(rv) == len(rvl) | |
| return rv | |
| class _EnumLikeProperty(_Property): | |
| ''' Base for properties with a fixed number of values. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, cls) | |
| assert val in cls.value_set | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| by_name = cls.value_by_name | |
| return by_name[val] | |
| class _InstantiableProperty(_EnumLikeProperty): | |
| ''' Base for properties whose values are instances thereof. | |
| New subclasses are created dynamically by make_enum. | |
| ''' | |
| def __init__(self, index, aliases, *, manual_index): | |
| self.value_aliases = aliases | |
| self.short_value_name = aliases[manual_index + 0] | |
| self.long_value_name = aliases[manual_index + 1] | |
| def __repr__(self): | |
| return '%s.%s' % (self.__class__.__name__, self.long_value_name) | |
| class Enum(_InstantiableProperty): | |
| ''' Property whose values rarely expand in future versions. | |
| As a result, they can meaningfully be used with bitsets. | |
| ''' | |
| def __init__(self, index, aliases, *, manual_index): | |
| super().__init__(index, aliases, manual_index=manual_index) | |
| if manual_index: | |
| index = int(aliases[0]) | |
| self.index = index | |
| class Catalog(_InstantiableProperty): | |
| ''' Property whose values commonly expand in future versions. | |
| As a result, they *cannot* meaningfully be used with bitsets. | |
| ''' | |
| class Bool(_EnumLikeProperty): | |
| ''' Property whose values are true/false. | |
| ''' | |
| value_by_name = { | |
| k: v | |
| for keys, v in [ | |
| (('N', 'No', 'F', 'False'), False), | |
| (('Y', 'Yes', 'T', 'True'), True), | |
| ] | |
| for k in keys | |
| } | |
| value_set = set(value_by_name.values()) | |
| value_list = [False, True] | |
| class Int(_Property): | |
| ''' Property whose values are an integer, encoded in base 10. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, int) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| return int(val) | |
| class Rational(_Property): | |
| ''' Property whose values are a ratio of integers, encoded with a slash. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, fractions.Fraction) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| return fractions.Fraction(val) | |
| class Decimal(_Property): | |
| ''' Property whose values are a ratio of integers, encoded as decimal. | |
| May be inexact. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, decimal.Decimal) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| return decimal.Decimal(val) | |
| class RawString(_Property): | |
| ''' Property whose values are unencoded strings. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, str) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| return val | |
| class Codepoint(_Property): | |
| ''' Property whose values are single codepoints. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, int) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| assert 4 <= len(val) <= 6 | |
| return int(val, 16) | |
| class CodepointRange(_Property): | |
| ''' Property whose values are ranges of codepoints. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, tuple) | |
| assert len(val) == 2 | |
| assert isinstance(val[0], int) | |
| assert isinstance(val[1], int) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| if '..' in val: | |
| low, high = val.split('..') | |
| assert low != high | |
| else: | |
| low = high = val | |
| return Codepoint.convert(low), Codepoint.convert(high) | |
| class CodepointSequence(_Property): | |
| ''' Property whose values are strings, encoded as a sequence of codepoints. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, str) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| return ''.join(chr(cp) for cp in _split(val)) | |
| class U_Codepoint(_Property): | |
| ''' Property whose values are single codepoints, encoded with U+ out front. | |
| ''' | |
| @classmethod | |
| def check(cls, val): | |
| assert isinstance(val, int) | |
| @classmethod | |
| def convert(cls, val): | |
| assert isinstance(val, str) | |
| assert str.startswith('U+') | |
| return Codepoint.convert(val[2:]) | |
| def unique_aliases(aliases): | |
| aliases = list(aliases) | |
| aliases_folded = [a.casefold() for a in aliases] | |
| seen_orig = set() | |
| seen_fold = set() | |
| remove = [] | |
| for i, a in enumerate(aliases): | |
| f = a.casefold() | |
| if a in seen_orig: | |
| assert (i == 1 and len(set(aliases_folded)) == 1) or (i == 2 and len(aliases) == 3 and a.startswith('CCC')), a | |
| remove.append(i) | |
| elif f in seen_fold: | |
| assert f in aliases_folded[:2], f | |
| seen_orig.add(a) | |
| seen_fold.add(f) | |
| while remove: | |
| aliases.pop(remove.pop()) | |
| return aliases | |
| def _make_aliases(g, aliases, cls): | |
| for a in unique_aliases(aliases): | |
| assert a not in g, a | |
| g[a] = cls | |
| def _make_class(g, aliases, cls, *, doc=None, min, max='9999.0.0'): | |
| # TODO make doc mandatory | |
| cls.__module__ = g['__name__'] | |
| cls.__doc__ = doc | |
| cls.min_unicode_version = min | |
| cls.max_unicode_version = max | |
| cls.aliases = aliases | |
| cls.short_name = aliases[0] | |
| cls.long_name = aliases[1] | |
| cls.__qualname__ = cls.__name__ = cls.long_name | |
| _make_aliases(g, aliases, cls) | |
| g['__all__'].append(cls.__name__) | |
| def _make_subclass(g, aliases, cls, **kwargs): | |
| class C(cls): | |
| pass | |
| _make_class(g, aliases, C, **kwargs) | |
| def make_set(g, aliases, elem, **kwargs): | |
| class S(Set): | |
| element = elem | |
| _make_class(g, aliases, S, **kwargs) | |
| def make_enum(g, aliases, values, *, catalog=False, manual_index=False, **kwargs): | |
| class E(Catalog if catalog else Enum): | |
| value_by_name = {} | |
| value_set = set() | |
| value_list = [] | |
| _make_class(g, aliases, E, **kwargs) | |
| for i, pvas in enumerate(values): | |
| v = E(i, pvas, manual_index=manual_index) | |
| E.value_list.append(v) | |
| _make_aliases(E.value_by_name, pvas, v) | |
| _make_aliases(_util.ClassDict(E), pvas, v) | |
| E.value_set = set(E.value_by_name.values()) | |
| make_bool = partial(_make_subclass, cls=Bool) | |
| make_int = partial(_make_subclass, cls=Int) | |
| make_rational = partial(_make_subclass, cls=Rational) | |
| make_decimal = partial(_make_subclass, cls=Decimal) | |
| make_raw = partial(_make_subclass, cls=RawString) | |
| make_code = partial(_make_subclass, cls=Codepoint) | |
| make_code_range = partial(_make_subclass, cls=CodepointRange) | |
| make_code_seq = partial(_make_subclass, cls=CodepointSequence) | |
| make_u_code = partial(_make_subclass, cls=U_Codepoint) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment