Created
September 7, 2017 13:21
-
-
Save askoufis/c832d87fbbb2258770c7d43f9bfd1854 to your computer and use it in GitHub Desktop.
Pulls kaomoji from http://kaomoji.n-at.me/kaomoji.html into a text file that can be parse by the Google Japanese IME. Taken from https://gist.github.com/kakakaya/3b40a074a6dc7e8717154e85c0aa52e6 and added a few lines to make it work for me.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding=utf-8 | |
from bs4 import BeautifulSoup | |
import requests | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('utf8') | |
soup = BeautifulSoup(requests.get("http://kaomoji.n-at.me/kaomoji.html").text, "html.parser") | |
kaomojis = filter(None, ['かおもじ\t'+i.getText()+'\t顔文字' if '\n' not in i.getText() else None for i in soup.find_all('span', class_='kaomoji')]) | |
with open('kaomoji_dic.txt', 'w') as f: | |
f.write('\n'.join(kaomojis)+'\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment