Skip to content

Instantly share code, notes, and snippets.

@ymattu
Created January 17, 2016 06:18
Show Gist options
  • Select an option

  • Save ymattu/db116c51cb55ee8ab8a5 to your computer and use it in GitHub Desktop.

Select an option

Save ymattu/db116c51cb55ee8ab8a5 to your computer and use it in GitHub Desktop.
ワードクラウド(Python3.5, Mac)
# coding:utf-8
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from bs4 import BeautifulSoup
import requests
import MeCab as mc
def mecab_analysis(text):
t = mc.Tagger("-Ochasen")
node = t.parseToNode(text)
output = []
while(node):
if node.surface != (""): # ヘッダとフッタを除外
word_type = node.feature.split(",")[0]
if word_type in ["形容詞", "動詞","名詞", "副詞"]:
output.append(node.surface)
node = node.next
if node is None:
break
return output
def get_wordlist_from_QiitaURL(url):
res = requests.get(url)
soup = BeautifulSoup(res.text, "lxml")
text = soup.body.section.get_text().replace('\n','').replace('\t','')
return mecab_analysis(text)
def create_wordcloud(text):
# 環境に合わせてフォントのパスを指定する。
#fpath = "/System/Library/Fonts/HelveticaNeue-UltraLight.otf"
fpath = "/Library/Fonts/ヒラギノ角ゴ Pro W3.otf"
# ストップワードの設定
stop_words = [ 'てる', 'いる', 'なる', 'れる', 'する', 'ある', 'こと', 'これ', 'さん', 'して', \
'くれる', 'やる', 'くださる', 'そう', 'せる', 'した', '思う', \
'それ', 'ここ', 'ちゃん', 'くん', '', 'て','に','を','は','の', 'が', 'と', 'た', 'し', 'で', \
'ない', 'も', 'な', 'い', 'か', 'ので', 'よう', '', 'れ','さ','なっ']
wordcloud = WordCloud(background_color="white",font_path=fpath, width=900, height=500, \
stopwords=set(stop_words)).generate(text)
plt.figure(figsize=(15,12))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
url = "http://qiita.com/t_saeko/items/2b475b8657c826abc114"
wordlist = get_wordlist_from_QiitaURL(url)
create_wordcloud(" ".join(wordlist))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment