Last active
September 11, 2024 05:51
-
-
Save SoursopID/b478dc27f3e69b93732a5e8fc8475ae7 to your computer and use it in GitHub Desktop.
Scrape emoji and create a golang file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
sesi = requests.session() | |
site_uri = "https://unicode.org/Public/emoji/latest/emoji-test.txt" | |
resp = sesi.get(site_uri) | |
def array_included(string, array): | |
for value in array: | |
if value.lower() in string.lower(): | |
return True | |
return False | |
if resp.status_code == 200: | |
lines = resp.text.split("\n") | |
with open("emoji.go", mode="w+") as f: | |
f.write("package emoji\n\ntype Emoji string\n\n") | |
const_lines = [] | |
map_lines = [] | |
for line in lines: | |
if len(line) <= 0 : | |
continue | |
excludes = [ | |
" man, man", | |
" woman, woman", | |
" person, person", | |
# " men holding", | |
# " women holding", | |
"transg", | |
] | |
# Remove junk | |
if array_included(line, excludes): | |
print(line) | |
continue | |
if line.startswith("#") and "group:" in line: | |
line = line.replace("#", "//") | |
const_lines.append(f"\n\t{line}\n\n") | |
map_lines.append(f"\n\t{line}\n\n") | |
# print(line) | |
if "fully-qualified" not in line: | |
continue | |
if not line.startswith("#"): | |
try : | |
code, notes = line.split(";") | |
qualify, emoji_note = notes.split(" # ") | |
emoji_split = emoji_note.split(" ") | |
emoji = emoji_split[0] | |
emoji_version = emoji_split[1] | |
emoji_name_ori = " ".join(emoji_split[1:]) | |
emoji_name = "".join([ _.title() for _ in emoji_split[2:]]) | |
replacer = { | |
":": "", | |
",":"", | |
"&":"", | |
".":"", | |
"-":"", | |
"_":"", | |
"“":"", | |
"”":"", | |
"*":"Hash", | |
"#":"Stars", | |
"(":"", | |
")":"", | |
" ":"", | |
"’":"", | |
"1":"One", | |
"2":"Two", | |
"3":"Three", | |
"4":"Four", | |
"5":"Five", | |
"6":"Six", | |
"7":"Seven", | |
"8":"Eight", | |
"9":"Nine", | |
"0":"Zero", | |
"!":"", | |
"":"", | |
"":"", | |
} | |
for olds, news in replacer.items(): | |
emoji_name = emoji_name.replace(olds, news) | |
if not emoji_name.isalpha(): | |
print(emoji_name) | |
break | |
const_lines.append(f'\t{emoji_name} Emoji = "{emoji.encode("unicode-escape").decode("ASCII")}" // {emoji_note}\n') | |
map_lines.append(f'\t"{emoji_name}": {emoji_name},\n') | |
# break | |
except Exception as err : | |
print(line, err) | |
break | |
# write const | |
const_lines.insert(0, "const (\n") | |
const_lines.append(")\n") | |
f.write("\n\n") | |
# write map | |
map_lines.insert(0, "var EmojiMap = map[string]Emoji{\n") | |
map_lines.append("}\n") | |
f.writelines(const_lines) | |
f.writelines(map_lines) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment