Last active
July 13, 2023 11:32
-
-
Save rvanbruggen/23a0eccbffa663a9203b8fea5be58468 to your computer and use it in GitHub Desktop.
Spotify Playlist importer, queries, and dashboard
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spotipy | |
from neo4j import GraphDatabase | |
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth | |
# ------------------------------------ Configuration parameters ------------------------------------ # | |
user_id = "<<YOUR SPOTIFY USER ID>>" # Spotify user ID. | |
client = "<<YOUR SPOTIFY CLIENT ID>>" # Spotify client ID. | |
secret = "<<YOUR SPOTIFY CLIENT SECRET>>" # Spotify client secret. | |
playlist_uri = "spotify:playlist:1eCqsRrwBAFc2lf5ZLGa5m" # public playlist with songs to be sorted. | |
neo4j_url = "neo4j://localhost:7687" # bolt url of the neo4j database. | |
neo4j_username = "<<YOUR NEO4J USERNAME>>" # neo4j username. defaults to 'neo4j'. | |
neo4j_password = "<<YOUR NEO4J DB PASSWORD>>" # neo4j password. | |
scope = 'playlist-modify-private' # Spotify scope required to manage playlists. | |
redirect_uri = 'http://localhost:8888/callback' # Spotify callback url. Set to localhost for development. | |
cache_path = "spotify_cache.tmp" # Where spotify caches the session variables. | |
create_constraints = True # Whether to create constraints. | |
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client, client_secret=secret)) | |
# ------------------------------------ Configuration parameters end --------------------------------- # | |
def load_graph_using_spotify_api(): | |
neo4j = create_neo4j_session(url=neo4j_url, user=neo4j_username, password=neo4j_password) | |
print("dropping and creating constraints...") | |
recreate_contraints(neo4j) | |
print("creating tracks...") | |
tracks = get_tracks() | |
tracks = get_track_audio_features(tracks) | |
neo4j.run("UNWIND $tracks as track CREATE (t:Track{id: track.id}) SET t = track", | |
parameters={'tracks': list(tracks.values())}) | |
print("creating albums...") | |
albums = get_album_info(tracks) | |
neo4j.run("UNWIND $albums as album CREATE (a:Album{id: a.id}) SET a = album", | |
parameters={'albums': list(albums.values())}) | |
print("creating artists...") | |
artists = get_artist_info(tracks) | |
neo4j.run("UNWIND $artists as artist CREATE (a:Artist{id: a.id}) SET a = artist", | |
parameters={'artists': list(artists.values())}) | |
print("finding related artists..") | |
related_artists = get_related_artists(artists) | |
neo4j.run("""UNWIND $relatedartists as artist MERGE (a:Artist {id: artist.id}) SET a = artist """, | |
parameters={'relatedartists': list(related_artists.values())}) | |
neo4j.run("""MATCH (a:Artist) WHERE EXISTS (a.original_artist) WITH a | |
MATCH (a2:Artist{id: a.original_artist}) | |
MERGE (a)-[:SPOTIFY_RELATES_TO]->(a2)""") | |
print("creating genres..") | |
genres = get_genres(albums, artists) | |
neo4j.run("UNWIND $genres as genre MERGE (g:Genre{name: genre})", | |
parameters={'genres': list(genres)}) | |
print("Linking tracks to albums, genres, and artists...") | |
neo4j.run("MATCH (t:Track), (a:Album{id: t.album}) CREATE (t)-[:TRACK_IN_ALBUM]->(a);") | |
neo4j.run("MATCH (t:Track) UNWIND t.artists as artist MATCH (a:Artist{id: artist}) CREATE (t)-[:TRACK_HAS_ARTIST]->(a)") | |
neo4j.run("MATCH (a:Artist) UNWIND a.genres as genre MATCH (g:Genre{name: genre}) CREATE (a)-[:ARTIST_HAS_GENRE]->(g)") | |
neo4j.run("MATCH (a1:Artist)<--(t:Track)-->(a2:Artist) WHERE id(a1)<id(a2) MERGE (a1)-[:WORKED_WITH {track:t.uri}]->(a2)") | |
neo4j.run("MATCH (ar:Artist)<--(t:Track)-->(al:Album) MERGE (al)-[:ALBUM_HAS_ARTIST]->(ar)") | |
print("Calculate artist similarity using GDS..") | |
neo4j.run(""" | |
MATCH (item:`Artist`)-[:`ARTIST_HAS_GENRE`]->(category:`Genre`) | |
WITH {item:id(item), categories: collect(distinct id(category))} as userData | |
WITH collect(userData) as dataset | |
CALL gds.alpha.similarity.overlap.write({ | |
data: dataset, | |
weightproperty: null, | |
nodeProjection: '*', | |
writeProperty: 'score', | |
writeRelationshipType: 'GDS_ARTIST_SIMILAR_OVERLAP', | |
similarityCutoff: 0.05, | |
degreeCutoff: 0 }) | |
YIELD nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100 | |
RETURN nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, p95""") | |
print("Calculate artist pagerank-spotify using GDS..") | |
neo4j.run(""" | |
CALL gds.pageRank.write({ | |
nodeProjection: 'Artist', | |
relationshipProjection: { | |
relType: { | |
type: 'SPOTIFY_RELATES_TO', | |
orientation: 'NATURAL', | |
properties: {} | |
} | |
}, | |
relationshipWeightProperty: null, | |
dampingFactor: 0.85, | |
maxIterations: 20, | |
writeProperty: 'pagerank-spotify'}) | |
""") | |
print("Calculate artist pagerank-workedwith using GDS..") | |
neo4j.run(""" | |
CALL gds.pageRank.write({ | |
nodeProjection: 'Artist', | |
relationshipProjection: { | |
relType: { | |
type: 'WORKED_WITH', | |
orientation: 'UNDIRECTED', | |
properties: {} | |
} | |
}, | |
relationshipWeightProperty: null, | |
dampingFactor: 0.85, | |
maxIterations: 20, | |
writeProperty: 'pagerank-workedwith' | |
}) | |
""") | |
print("Calculate artist pagerank-similarity using GDS..") | |
neo4j.run(""" | |
CALL gds.pageRank.write({ | |
nodeProjection: 'Artist', | |
relationshipProjection: { | |
relType: { | |
type: 'GDS_ARTIST_SIMILAR_OVERLAP', | |
orientation: 'UNDIRECTED', | |
properties: {} | |
} | |
}, | |
relationshipWeightProperty: null, | |
dampingFactor: 0.85, | |
maxIterations: 20, | |
writeProperty: 'pagerank-similarity' | |
}) | |
""") | |
print("Calculate artist Louvain community using GDS..") | |
neo4j.run(""" | |
CALL gds.louvain.write({ | |
nodeProjection: 'Artist', | |
relationshipProjection: { | |
relType: { | |
type: 'SPOTIFY_RELATES_TO', | |
orientation: 'UNDIRECTED', | |
properties: {} | |
} | |
}, | |
relationshipWeightProperty: null, | |
includeIntermediateCommunities: false, | |
seedProperty: 'valence', | |
nodeProperties: [ | |
'valence' | |
], | |
writeProperty: 'louvain-community'}) | |
""") | |
print("Starting the loading!") | |
def recreate_contraints(neo4j): | |
# recreate constraints / indices and clear existing database. | |
results = neo4j.run("CALL db.constraints") | |
for constraint in results: | |
result = neo4j.run("DROP " + constraint['description']) | |
neo4j.run("CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE") | |
neo4j.run("CREATE CONSTRAINT ON (a:Album) ASSERT a.id IS UNIQUE") | |
neo4j.run("CREATE CONSTRAINT ON (a:Artist) ASSERT a.id IS UNIQUE") | |
neo4j.run("CREATE CONSTRAINT ON (t:Track) ASSERT t.id IS UNIQUE") | |
neo4j.run("MATCH (n) DETACH DELETE n;") | |
def get_tracks(): | |
results = spotify.playlist(playlist_uri)['tracks'] | |
items = {} | |
while results['next'] or results['previous'] is None: | |
for track in results["items"]: | |
if track['track']['id']: | |
track['track']['artists'] = [artist if type(artist) == str else artist['id'] for artist in | |
track['track']['artists']] | |
track['track']['album'] = track['track']['album'] if type(track['track']['album']) == str else \ | |
track['track']['album']['id'] | |
items[track['track']['id']] = track['track'] | |
for field in track['track']: | |
if track is not None and type(track['track'][field]) == dict: | |
track['track'][field] = None | |
if not results['next']: | |
break | |
results = spotify.next(results) | |
return items | |
def get_track_audio_features(tracks, page_size=100): | |
page_count = len(tracks) / page_size | |
for i in range(int(page_count) + 1): | |
ids = list(tracks.keys())[i * page_size:(i + 1) * page_size] | |
if len(ids) == 0: | |
break | |
audio_features = spotify.audio_features(tracks=ids) | |
for track_features in audio_features: | |
if track_features is None: | |
continue | |
track_id = track_features['id'] | |
for feature, value in track_features.items(): | |
if feature != 'type': | |
tracks[track_id][feature] = value | |
return tracks | |
def get_album_info(tracks, page_size=20): | |
album_ids = set() | |
for track_id in tracks.keys(): | |
album_ids.add(tracks[track_id]['album']) | |
all_albums = {} | |
page_count = len(album_ids) / page_size | |
for i in range(int(page_count) + 1): | |
ids = list(album_ids)[i * page_size:(i + 1) * page_size] | |
results = spotify.albums(ids) | |
for album in results['albums']: | |
album['artists'] = [artist['id'] for artist in album['artists']] | |
album['images'] = album['images'][1]['url'] | |
album['external_ids'] = None | |
album['external_urls'] = None | |
album['tracks'] = len(album['tracks']) | |
album['copyrights'] = len(album['copyrights']) | |
all_albums[album['id']] = album | |
return all_albums | |
def get_artist_info(items, page_size=50): | |
all_artists = {} | |
artist_ids = set() | |
for track_id in items.keys(): | |
for artist_nr in items[track_id]['artists']: | |
artist_id = artist_nr | |
artist_ids.add(artist_id) | |
# after we have a list of all artists, get the details from the API | |
page_count = len(artist_ids) / page_size | |
for i in range(int(page_count) + 1): | |
ids = list(artist_ids)[i * page_size:(i + 1) * page_size] | |
results = spotify.artists(ids) | |
for artist in results['artists']: | |
if artist["images"]: | |
artist['images'] = artist['images'][1]['url'] | |
artist['followers'] = artist['followers']['total'] | |
artist['external_urls'] = None | |
all_artists[artist['id']] = artist | |
return all_artists | |
def get_related_artists(items, page_size=50): | |
page_count = len(items) / page_size | |
new_artists = {} | |
for i in range(int(page_count) + 1): | |
ids = list(items)[i * page_size:(i + 1) * page_size] | |
for id in ids: | |
related_artists = spotify.artist_related_artists(id) | |
for related_artist in related_artists['artists']: | |
related_artist['original_artist'] = id | |
if related_artist["images"]: | |
related_artist['images'] = related_artist['images'][1]['url'] | |
related_artist['followers'] = related_artist['followers']['total'] | |
related_artist['external_urls'] = None | |
new_artists[related_artist['id']] = related_artist | |
return new_artists | |
def get_genres(albums, artists): | |
genres = set() | |
for item in albums: | |
for genre in albums[item]['genres']: | |
genres.add(genre) | |
for item in artists: | |
for genre in artists[item]['genres']: | |
genres.add(genre) | |
return genres | |
def create_neo4j_session(url, user, password): | |
driver = GraphDatabase.driver(url, auth=(user, password)) | |
return driver.session() | |
if __name__ == '__main__': | |
load_graph_using_spotify_api() | |
print("Done!") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// structure of the graph | |
match (n) | |
return "Node" as Type,labels(n) as Name,count(n) as Count | |
union | |
match ()-[r]->() | |
return "Relationship" as Type,type(r) as Name, count(r) as Count | |
//links between two artists | |
match (a1:Artist), (a2:Artist), | |
path = allshortestpaths ((a1)-[*]-(a2)) | |
where toUpper(a1.name) contains "BRUCE" | |
and toUpper(a2.name) contains "TOM" | |
return path | |
limit 10; | |
//table of interesting artists | |
match (a:Artist) | |
return a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity | |
order by a.`pagerank-spotify` desc | |
limit 10 | |
//neighborhood of most important artists | |
match path = ((a:Artist)-[*..2]-(conn)) | |
return path | |
order by a.`pagerank-spotify` desc | |
limit 10 | |
//some stats about number of tracks per artist | |
match (a:Artist)<--(t:Track) | |
return a.name as Artist, count(t) as NumberOfTracks | |
order by NumberOfTracks desc | |
limit 10; | |
//some stats about number of tracks per album | |
match (ar:Artist)<--(t:Track)-->(al:Album)-->(a) | |
return ar.name, al.name, count(t) | |
order by count(t) desc | |
limit 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"title": "Welcome to my Spotify Dashboard!", | |
"version": "1.0", | |
"editable": true, | |
"reports": [ | |
{ | |
"title": "Based on the following projects", | |
"width": 12, | |
"height": 4, | |
"type": "text", | |
"query": "* [Niels' Spofify Playlist Builder](https://nielsdejong.nl/neo4j%20projects/2020/09/23/spotify-playlist-builder.html)\n* [Niels' NeoDash project](https://nielsdejong.nl/neo4j%20projects/2020/11/16/neodash.html) - which enables this page!\n\n\n\n\nIn this Dashboard, we will show you how you can take a look at the small Spotify Graph that we created.!\nYou will find all the code on [github of course](https://gist.github.com/rvanbruggen/23a0eccbffa663a9203b8fea5be58468)..\n", | |
"page": 1, | |
"properties": [], | |
"parameters": "", | |
"refresh": 1 | |
}, | |
{ | |
"title": "Structure of the graph - queries:", | |
"width": 12, | |
"height": 4, | |
"type": "text", | |
"query": "Graphically:\n\n```\ncall db.schema.visualizations()\n```\n\nOr as a table:\n```\nmatch (n)\nreturn \"Node\" as Type,labels(n) as Name,count(n) as Count\nunion\nmatch ()-[r]->()\nreturn \"Relationship\" as Type,type(r) as Name, count(r) as Count\n```", | |
"page": 1, | |
"properties": [], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Structure of the graph", | |
"width": 6, | |
"height": 4, | |
"type": "graph", | |
"query": "call db.schema.visualization()", | |
"page": 24, | |
"properties": [ | |
"name", | |
"name", | |
"name", | |
"name" | |
], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Structure of the graph", | |
"width": 6, | |
"height": 4, | |
"type": "table", | |
"query": "// structure of the graph\nmatch (n)\nreturn \"Node\" as Type,labels(n) as Name,count(n) as Count\nunion\nmatch ()-[r]->()\nreturn \"Relationship\" as Type,type(r) as Name, count(r) as Count\n", | |
"page": 1, | |
"properties": [], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Cypher queries:", | |
"width": 12, | |
"height": 8, | |
"type": "text", | |
"query": "#### Links between \"BRUCE\" and \"TOM\":\n```\nmatch (a1:Artist), (a2:Artist),\npath = allshortestpaths ((a1)-[*]-(a2))\nwhere toUpper(a1.name) contains \"BRUCE\"\nand toUpper(a2.name) contains \"TOM\"\nreturn path\nlimit 10;\n```\n\n#### Table of interesting artists\n```\nmatch (a:Artist)\nreturn a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity\norder by a.`pagerank-spotify` desc\nlimit 10\n```\n\n#### Neighborhood of most important artists\n```\nmatch path = ((a:Artist)-[*..2]-(conn))\nreturn path\norder by a.`pagerank-spotify` desc\nlimit 10\n```\n\nSee below for all the results!\n", | |
"page": 1, | |
"properties": [], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Links between \"BRUCE\" and \"TOM\"", | |
"width": 12, | |
"height": 4, | |
"type": "graph", | |
"query": "match (a1:Artist), (a2:Artist), path = allshortestpaths((a1)-[*]-(a2))\nwhere toUpper(a1.name) contains \"BRUCE\"\nand toUpper(a2.name) contains \"TOM\"\nreturn path\nlimit 10", | |
"page": 38, | |
"properties": [ | |
"name", | |
"name" | |
], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Table of Interesting Artists", | |
"width": 12, | |
"height": 4, | |
"type": "table", | |
"query": "match (a:Artist)\nreturn a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity\norder by a.`pagerank-spotify` desc\nlimit 10", | |
"page": 1, | |
"properties": [], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Neighborhood of important artists", | |
"width": 12, | |
"height": 4, | |
"type": "graph", | |
"query": "match path = ((a:Artist)-[*..2]-(conn))\nreturn path\norder by a.`pagerank-spotify` desc\nlimit 10", | |
"page": 11, | |
"properties": [ | |
"name" | |
], | |
"parameters": "", | |
"refresh": 0 | |
}, | |
{ | |
"title": "How many songs per Artist?", | |
"width": 12, | |
"height": 8, | |
"type": "bar", | |
"query": "match (a:Artist)<--(t:Track)\nreturn a.name as Artist, count(t) as NumberOfTracks\norder by NumberOfTracks desc\nlimit 10;\n", | |
"page": 18, | |
"properties": [], | |
"parameters": "{\"x\":\"Artist\",\"y\":10}", | |
"refresh": 0 | |
}, | |
{ | |
"title": "Number of Songs in an Album", | |
"width": 12, | |
"height": 8, | |
"type": "bar", | |
"query": "match (ar:Artist)<--(t:Track)-->(al:Album)-->(a)\nreturn ar.name as Artist, al.name as Album, count(t) as NumberOfSongs\norder by count(t) desc\nlimit 10\n", | |
"page": 7, | |
"properties": [ | |
"Album", | |
"NumberOfSongs" | |
], | |
"parameters": "{\"x\":\"Album\",\"y\":10}", | |
"refresh": 0 | |
}, | |
{} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment