Last active
June 11, 2020 17:54
-
-
Save GrenderG/fd4eb4c73eadad219b0ecb5f9345070a to your computer and use it in GitHub Desktop.
Getting all emojis directly from http://emoji.codes/ and formatted to use in https://github.com/mrowa44/emojify
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
def start_requests(): | |
urls = [ | |
'http://emoji.codes/family?c=people', | |
'http://emoji.codes/family?c=nature', | |
'http://emoji.codes/family?c=food', | |
'http://emoji.codes/family?c=activity', | |
'http://emoji.codes/family?c=travel', | |
'http://emoji.codes/family?c=objects', | |
'http://emoji.codes/family?c=symbols', | |
'http://emoji.codes/family?c=flags', | |
'http://emoji.codes/family?c=diversity' | |
] | |
print 'emojis=(' | |
for url in urls: | |
req = requests.get(url) | |
html = BeautifulSoup(req.text, 'html.parser') | |
parse(html, url.split('=')[1]) | |
print ')' | |
def parse(html, title): | |
emoji_list = html.find(id='emoji-list') | |
print '\n\t# category: ' + title + '\n' | |
for emoji in emoji_list.find_all('tr'): | |
code = u'\\U' + emoji.get('id').replace('-', '\\U') | |
shortcode = emoji.select_one('span[class*=shortcode]').text | |
if title == 'diversity': | |
if 'tone1' in shortcode: | |
print '\t[\"' + shortcode.replace('_tone1', '') + \ | |
'\"]=\"\\' + (code.split('\\'))[1].split('\\')[0] + '\"' | |
print '\t[\"' + shortcode + '\"]=\"' + code + '\"' | |
if __name__ == '__main__': | |
start_requests() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
curr_index = 0 | |
def start_requests(): | |
urls = [ | |
'http://emoji.codes/family?c=people', | |
'http://emoji.codes/family?c=nature', | |
'http://emoji.codes/family?c=food', | |
'http://emoji.codes/family?c=activity', | |
'http://emoji.codes/family?c=travel', | |
'http://emoji.codes/family?c=objects', | |
'http://emoji.codes/family?c=symbols', | |
'http://emoji.codes/family?c=flags', | |
'http://emoji.codes/family?c=diversity' | |
] | |
for url in urls: | |
req = requests.get(url) | |
html = BeautifulSoup(req.text, 'html.parser') | |
parse(html, url.split('=')[1]) | |
def parse(html, title): | |
emoji_list = html.find(id='emoji-list') | |
print '# category: ' + title + '\n' | |
for emoji in emoji_list.find_all('tr'): | |
code = u'\\U' + emoji.get('id').replace('-', '\\U') | |
shortcode = emoji.select_one('span[class*=shortcode]').text | |
global curr_index | |
if title == 'diversity': | |
if 'tone1' in shortcode: | |
print 'keys[' + str(curr_index) + ']=\'' + shortcode.replace('_tone1', '') + '\'; values[' + str(curr_index) + ']=\'' + '\\' + (code.split('\\'))[1].split('\\')[0] + '\';' | |
curr_index +=1 | |
print 'keys[' + str(curr_index) + ']=\'' + shortcode + '\'; values[' + str(curr_index) + ']=\'' + code + '\';' | |
curr_index += 1 | |
print '\n' | |
if __name__ == '__main__': | |
start_requests() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
pip install beautifulsoup4
pip install requests
If you want to redirect the output stream to a file you can simply do
python emoji_scrap.py > list.txt