Last active
February 12, 2016 12:18
-
-
Save butsugiri/da8c4f719d6e7ba5a6a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Parse iTunes Library and Generate Word Cloud | |
""" | |
""" | |
Copy "iTunes Music Library.xml" to the same directory as Python script | |
""" | |
import matplotlib.pyplot as plt | |
from lxml import etree | |
from collections import defaultdict | |
from wordcloud import WordCloud | |
def parse_XML_by_PlayCount(): | |
with open("./iTunes.xml","r") as f: | |
tree = etree.parse(f).xpath("//dict") | |
play_count_flag = False | |
artist_name_flag = False | |
content_kind_flag = False | |
podcast_flag = False | |
content_kind = "" | |
artist_name = None | |
play_count = 0 | |
d = defaultdict(int) | |
for elements in tree: | |
for elem in elements: | |
if elem.tag == "key" and elem.text =="Kind": | |
content_kind_flag = True | |
continue | |
elif elem.tag == "key" and elem.text == "Play Count": | |
play_count_flag = True | |
continue | |
elif elem.tag == "key" and elem.text == "Artist": | |
artist_name_flag = True | |
continue | |
elif elem.tag == "key" and elem.text == "Podcast": | |
play_count_flag = False | |
artist_name_flag = False | |
content_kind_flag = False | |
podcast_flag = False | |
content_kind = "" | |
artist_name = None | |
play_count = 0 | |
break | |
if content_kind_flag: | |
content_kind_flag = False | |
content_kind = elem.text | |
continue | |
elif play_count_flag: | |
play_count_flag = False | |
play_count = int(elem.text) | |
continue | |
elif artist_name_flag: | |
artist_name_flag = False | |
artist_name = unicode(elem.text) | |
if "app" in content_kind: | |
content_kind = "" | |
continue | |
elif artist_name and podcast_flag == False: | |
d[artist_name] += play_count | |
artist_name = None | |
play_count = 0 | |
content_kind = "" | |
podcast_flag = False | |
artist_tuple = [] | |
for artist,n in d.iteritems(): | |
artist_tuple.append((artist,n)) | |
return artist_tuple | |
def parse_XML_by_NofSongs(): | |
with open("./iTunes.xml","r") as f: | |
tree = etree.parse(f).xpath("//dict") | |
artist_name_flag = False | |
content_kind_flag = False | |
content_kind = "" | |
artist_name = None | |
d = defaultdict(int) | |
for elements in tree: | |
for elem in elements: | |
if elem.text == "Artist": | |
artist_name_flag = True | |
continue | |
elif elem.text == "Kind": | |
content_kind_flag = True | |
continue | |
elif elem.text == "Podcast": | |
content_kind_flag = False | |
artist_name_flag = False | |
content_kind = "" | |
artist_name = None | |
break | |
if artist_name_flag: | |
artist_name = unicode(elem.text) | |
artist_name_flag = False | |
continue | |
elif content_kind_flag: | |
content_kind = elem.text | |
content_kind_flag = False | |
continue | |
if "app" in content_kind: | |
artist_name = None | |
content_kind = "" | |
continue | |
else: | |
if artist_name: | |
d[artist_name] += 1 | |
artist_name = None | |
content_kind = "" | |
else: | |
continue | |
artist_tuple = [] | |
for artist,n in d.iteritems(): | |
artist_tuple.append((artist,n)) | |
return artist_tuple | |
if __name__ == "__main__": | |
playCount = parse_XML_by_PlayCount() | |
songsCount = parse_XML_by_NofSongs() | |
#同じwordcloudインスタンスを使うと,playCountCloud == songsCoundCloud → Trueになってしまう | |
#回避法がわからないので,別のインスタンスを定義 | |
fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W5.ttc" | |
wordcloud1 = WordCloud(font_path=fpath,width=800,height=600) | |
wordcloud2 = WordCloud(font_path=fpath,width=800,height=600) | |
playCountCloud = wordcloud1.generate_from_frequencies(playCount) | |
songsCountCloud = wordcloud2.generate_from_frequencies(songsCount) | |
#再生回数から求めたワードクラウドを表示 | |
fig1 = plt.figure(dpi=150) | |
ax1 = fig1.add_subplot(111) | |
ax1.imshow(playCountCloud) | |
ax1.axis("off") | |
#曲数から求めたワードクラウドを表示 | |
fig2 = plt.figure(dpi=150) | |
ax2 = fig2.add_subplot(111) | |
ax2.imshow(songsCountCloud) | |
ax2.axis("off") | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment