Last active
May 31, 2016 12:04
-
-
Save graph226/6fa5f45d1b521ae1c606f69aaa09318d to your computer and use it in GitHub Desktop.
ワードクラウド
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding: utf-8 | |
import csv | |
import MeCab | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
STOP_WORDS = "の もの こと よう これ 一 http:// の 笑 ω 物 とき ら".split() | |
STOP_NOUN_DETAILS = "数 接続助詞 接尾 代名詞 非自立 副詞可能".split() | |
def csv_reader(data_path): | |
file = open(data_path, 'r') | |
data_list = csv.reader(file) | |
return data_list | |
def get_nouns(string): | |
tagger = MeCab.Tagger('-d /opt/brew/lib/mecab/dic/mecab-ipadic-neologd') | |
text = str(string) | |
node = tagger.parseToNode(text) | |
nouns = [] | |
while node: | |
word_detail = node.feature.split(",") | |
pos = node.feature.split(",")[0] | |
word = node.surface | |
noun_detail = word_detail[1] | |
if pos == "名詞": | |
if noun_detail not in STOP_NOUN_DETAILS: | |
if word not in STOP_WORDS: | |
nouns.append(word) | |
node = node.next | |
return nouns | |
def main(): | |
fpath = "/Library/Fonts/Yu Gothic Medium.otf" | |
history_list = csv_reader('') | |
lst = list(history_list) | |
nouns_all = [] | |
for row in lst: | |
text = row[0] | |
nouns_in_row = get_nouns(text) | |
nouns_all.extend(nouns_in_row) | |
lst_wordcloud = " ".join(nouns_all).decode('utf-8') | |
wordcloud = WordCloud( | |
background_color = "white", | |
font_path = fpath, | |
width = 2880, | |
height = 1800).generate(lst_wordcloud) | |
plt.figure(figsize=(29,18)) | |
plt.imshow(wordcloud) | |
plt.axis("off") | |
plt.show() | |
## Execute | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment