Created
December 10, 2024 21:14
-
-
Save gilillo32/1f133af68673e92e3495d13c1caaf53a to your computer and use it in GitHub Desktop.
Homemade Spotify Wrapped
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from collections import defaultdict | |
""" | |
This script reads the streaming history data from the file 'streaming_history_combined.json' and processes it to find | |
the top 10 artists and tracks by count and by time, as well as the total listening time. The data is filtered to only | |
include tracks listened to in the year 2024 between January 1st and November 15th. This has be done to compare | |
the results with the data from the Spotify Wrapped 2024. After some tests and playing with the parameters, we can see | |
that the data from the Spotify Wrapped 2024 is not the same as the data from the streaming history. This can be due to | |
a variety of reasons: The criteria for counting or excluding playing time, the criteria for counting or excluding song | |
plays... | |
Until some angry Spotify developer speaks out or the algorithm becomes open source, we won´t know the truth. | |
""" | |
def process_track(track, artist_count, artist_time, track_count, track_time, listening_time): | |
listening_time[0] += track['msPlayed'] | |
if track['msPlayed'] < 30000: | |
return | |
if track.get('podcastName') is None: | |
artist = track['artistName'] | |
song = track['trackName'] | |
ms_played = track['msPlayed'] | |
else: | |
artist = track['podcastName'] | |
song = track['episodeName'] | |
ms_played = track['msPlayed'] | |
artist_count[artist] += 1 | |
artist_time[artist] += ms_played | |
track_count[song] += 1 | |
track_time[song] += ms_played | |
def main(): | |
artist_count = defaultdict(int) | |
artist_time = defaultdict(int) | |
track_count = defaultdict(int) | |
track_time = defaultdict(int) | |
listening_time = [0] | |
with open('streaming_history_combined.json') as file: | |
data = json.load(file) | |
for item in data: | |
for track in item: | |
if '2024-01-01' <= track['endTime'][:10] <= '2024-11-15': | |
process_track(track, artist_count, artist_time, track_count, track_time, listening_time) | |
top_10_artists_by_count = sorted(artist_count.items(), key=lambda x: x[1], reverse=True)[:10] | |
top_10_artists_by_time = sorted(artist_time.items(), key=lambda x: x[1], reverse=True)[:10] | |
top_10_tracks_by_count = sorted(track_count.items(), key=lambda x: x[1], reverse=True)[:10] | |
top_10_tracks_by_time = sorted(track_time.items(), key=lambda x: x[1], reverse=True)[:10] | |
print("Top 10 artists by count:") | |
for artist, count in top_10_artists_by_count: | |
print(f"{artist}: {count}") | |
print("\nTop 10 artists by time:") | |
for artist, time in top_10_artists_by_time: | |
print(f"{artist}: {time / 1000 / 60 / 60:.2f} hours") | |
print("\nTop 10 tracks by count:") | |
for track, count in top_10_tracks_by_count: | |
print(f"{track}: {count}") | |
print("\nTop 10 tracks by time:") | |
for track, time in top_10_tracks_by_time: | |
print(f"{track}: {time / 1000 / 60 / 60:.2f} hours") | |
print(f"\nTotal listening time (minutes): {listening_time[0] / 1000 / 60:.2f}") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment