Created
February 24, 2024 20:22
-
-
Save ehzawad/5603e77ed1af52171a2ae527695be602 to your computer and use it in GitHub Desktop.
UTF-8 print all Bengali Characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import yaml | |
# Define the Unicode ranges for Bengali characters | |
# The ranges are tuples of start and end points, inclusive | |
# Unassigned code points will be skipped | |
bengali_char_ranges = [ | |
(0x0980, 0x0983), # Bengali Anji, etc. | |
(0x0985, 0x098C), # Bengali vowels | |
(0x098F, 0x0990), # Bengali E, AI | |
(0x0993, 0x09A8), # Bengali O, etc. | |
(0x09AA, 0x09B0), # Bengali letter KA, etc. | |
(0x09B2, 0x09B2), # Bengali letter LA | |
(0x09B6, 0x09B9), # Bengali SHA, etc. | |
(0x09BC, 0x09C4), # Bengali sign NUKTA, etc. | |
(0x09C7, 0x09C8), # Bengali VOWEL SIGN E, AI | |
(0x09CB, 0x09CE), # Bengali VOWEL SIGN O, etc. | |
(0x09D7, 0x09D7), # Bengali AU Length Mark | |
(0x09DC, 0x09DD), # Bengali letter RRA, etc. | |
(0x09DF, 0x09E3), # Bengali letter YYA, etc. | |
(0x09E6, 0x09FE), # Bengali digits and signs | |
] | |
bengali_chars = { | |
"characters": [] | |
} | |
# Function to populate characters using the ranges | |
def populate_characters(ranges, char_dict): | |
for start, end in ranges: | |
for codepoint in range(start, end + 1): | |
char = chr(codepoint) | |
char_dict["characters"].append(char) | |
# Populate Bengali characters | |
populate_characters(bengali_char_ranges, bengali_chars) | |
# Save as YAML | |
yaml_file_path = "bengali_chars.yaml" | |
with open(yaml_file_path, "w", encoding="utf-8") as f: | |
yaml.dump(bengali_chars, f, allow_unicode=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment