graph226 · August 16, 2016 08:43
diff --git a/line_visualizer.py b/line_visualizer.py
 #coding: utf-8
 import csv
 import MeCab
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 import re

 STOP_WORDS = "の もの こと よう これ 一 http:// の 笑 ω 物 とき ら 画像 Sticker Photo".split()
 STOP_NOUN_DETAILS = "数 接続助詞 接尾 代名詞 非自立 副詞可能".split()

 def get_nouns(string):
    tagger = MeCab.Tagger()
    text = str(string)
    node = tagger.parseToNode(text)

    nouns = []
    while node:
        word_detail = node.feature.split(",")
        pos = node.feature.split(",")[0]
        word = node.surface
        noun_detail = word_detail[1]
        if pos == "名詞":
            if noun_detail not in STOP_NOUN_DETAILS:
                if word not in STOP_WORDS:
                    nouns.append(word)
        node = node.next
    return nouns

 def main():
    fpath = "/Library/Fonts/Yu Gothic Medium.otf"
    lines = open('talk.txt','r').readlines()

    posts = []
    for line in lines:
        usual = re.match("\d\d:\d\d\t.*\t", line)
        if usual:
            posts.append(line[usual.end():].rstrip())
        elif re.match('.*"', line):
            posts.append(line.rstrip())
        else:
            continue

    nouns_all = []

    for post in posts:
        nouns_in_row = get_nouns(post)
        nouns_all.extend(nouns_in_row)

    lst_wordcloud = " ".join(nouns_all).decode('utf-8')

    wordcloud = WordCloud(
        background_color = "white",
        font_path = fpath,
        width = 1280,
        height = 720).generate(lst_wordcloud)

    plt.figure(figsize=(16,9))
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()


 ## Execute
 if __name__ == "__main__":
    main()
	#coding: utf-8
	import csv
	import MeCab
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	import re

	STOP_WORDS = "のものことようこれ一 http:// の笑 ω 物ときら画像 Sticker Photo".split()
	STOP_NOUN_DETAILS = "数接続助詞接尾代名詞非自立副詞可能".split()

	def get_nouns(string):
	tagger = MeCab.Tagger()
	text = str(string)
	node = tagger.parseToNode(text)

	nouns = []
	while node:
	word_detail = node.feature.split(",")
	pos = node.feature.split(",")[0]
	word = node.surface
	noun_detail = word_detail[1]
	if pos == "名詞":
	if noun_detail not in STOP_NOUN_DETAILS:
	if word not in STOP_WORDS:
	nouns.append(word)
	node = node.next
	return nouns

	def main():
	fpath = "/Library/Fonts/Yu Gothic Medium.otf"
	lines = open('talk.txt','r').readlines()

	posts = []
	for line in lines:
	usual = re.match("\d\d:\d\d\t.*\t", line)
	if usual:
	posts.append(line[usual.end():].rstrip())
	elif re.match('.*"', line):
	posts.append(line.rstrip())
	else:
	continue

	nouns_all = []

	for post in posts:
	nouns_in_row = get_nouns(post)
	nouns_all.extend(nouns_in_row)

	lst_wordcloud = " ".join(nouns_all).decode('utf-8')

	wordcloud = WordCloud(
	background_color = "white",
	font_path = fpath,
	width = 1280,
	height = 720).generate(lst_wordcloud)

	plt.figure(figsize=(16,9))
	plt.imshow(wordcloud)
	plt.axis("off")
	plt.show()


	## Execute
	if __name__ == "__main__":
	main()