Skip to content

Instantly share code, notes, and snippets.

@gilillo32
Created December 10, 2024 21:14
Show Gist options
  • Save gilillo32/1f133af68673e92e3495d13c1caaf53a to your computer and use it in GitHub Desktop.
Save gilillo32/1f133af68673e92e3495d13c1caaf53a to your computer and use it in GitHub Desktop.
Homemade Spotify Wrapped
import json
from collections import defaultdict
"""
This script reads the streaming history data from the file 'streaming_history_combined.json' and processes it to find
the top 10 artists and tracks by count and by time, as well as the total listening time. The data is filtered to only
include tracks listened to in the year 2024 between January 1st and November 15th. This has be done to compare
the results with the data from the Spotify Wrapped 2024. After some tests and playing with the parameters, we can see
that the data from the Spotify Wrapped 2024 is not the same as the data from the streaming history. This can be due to
a variety of reasons: The criteria for counting or excluding playing time, the criteria for counting or excluding song
plays...
Until some angry Spotify developer speaks out or the algorithm becomes open source, we won´t know the truth.
"""
def process_track(track, artist_count, artist_time, track_count, track_time, listening_time):
listening_time[0] += track['msPlayed']
if track['msPlayed'] < 30000:
return
if track.get('podcastName') is None:
artist = track['artistName']
song = track['trackName']
ms_played = track['msPlayed']
else:
artist = track['podcastName']
song = track['episodeName']
ms_played = track['msPlayed']
artist_count[artist] += 1
artist_time[artist] += ms_played
track_count[song] += 1
track_time[song] += ms_played
def main():
artist_count = defaultdict(int)
artist_time = defaultdict(int)
track_count = defaultdict(int)
track_time = defaultdict(int)
listening_time = [0]
with open('streaming_history_combined.json') as file:
data = json.load(file)
for item in data:
for track in item:
if '2024-01-01' <= track['endTime'][:10] <= '2024-11-15':
process_track(track, artist_count, artist_time, track_count, track_time, listening_time)
top_10_artists_by_count = sorted(artist_count.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_artists_by_time = sorted(artist_time.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_tracks_by_count = sorted(track_count.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_tracks_by_time = sorted(track_time.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 artists by count:")
for artist, count in top_10_artists_by_count:
print(f"{artist}: {count}")
print("\nTop 10 artists by time:")
for artist, time in top_10_artists_by_time:
print(f"{artist}: {time / 1000 / 60 / 60:.2f} hours")
print("\nTop 10 tracks by count:")
for track, count in top_10_tracks_by_count:
print(f"{track}: {count}")
print("\nTop 10 tracks by time:")
for track, time in top_10_tracks_by_time:
print(f"{track}: {time / 1000 / 60 / 60:.2f} hours")
print(f"\nTotal listening time (minutes): {listening_time[0] / 1000 / 60:.2f}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment