Last active
June 16, 2019 12:58
-
-
Save hannes/a5e5388c412598b4c13dcf482761acfe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request, json | |
import re | |
import pandas as pd | |
import spotipy | |
import itertools | |
# you will need to create a spotify app and add the credentials below | |
# also create a public spotify playlist and get it ID (the last part of its URI) | |
# import spotipy.util as util | |
# token = util.prompt_for_user_token('hfmuehleisen',"playlist-modify-public",client_id='XXX',client_secret='XXX',redirect_uri='http://example.com/callback') | |
token = 'XXX' | |
user = 'XXX' | |
playlist = 'XXX' | |
sp = spotipy.Spotify(auth=token) | |
sts = re.compile("^STS-\\d+$") | |
wuc = re.compile("^Wake-up.*$") | |
def jsonify(url): | |
return json.loads(urllib.request.urlopen(url).read().decode()) | |
# get all wiki pages from category 'Space Shuttle missions' | |
cat = jsonify("https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category%3ASpace_Shuttle_missions&format=json&cmlimit=200") | |
for page in cat['query']['categorymembers']: | |
# make sure they are named 'STS-XXX' | |
if sts.match(page["title"]): | |
# get all sections from those articles | |
sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title'])) | |
for sec in sections['parse']['sections']: | |
# find the sections named 'Wake-up calls' | |
if (wuc.match(sec['line'])): | |
print(page["title"]) | |
# get the content of that section and parse table | |
table_s = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s§ion=%s&prop=text&format=json" % (page['title'], sec['index'])) | |
tables = pd.read_html(table_s['parse']['text']['*'])[0].iloc[1:] | |
tracks = [] | |
for index, row in tables.iterrows(): | |
print (row[1], row[2]) | |
# search Spotify API for the title and artist of the listed music and collect their IDs | |
results = sp.search(q="%s artist:%s" % (row[1], row[2]), limit=1) | |
if (len(results['tracks']['items']) > 0): | |
tracks.append(results['tracks']['items'][0]['id']) | |
if len(tracks) > 0: | |
sp.user_playlist_add_tracks(user, playlist, tracks) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment