gilillo32 · December 10, 2024 21:14
diff --git a/homemade-wrapped.py b/homemade-wrapped.py
 import json
 from collections import defaultdict

 """
 This script reads the streaming history data from the file 'streaming_history_combined.json' and processes it to find 
 the top 10 artists and tracks by count and by time, as well as the total listening time. The data is filtered to only
 include tracks listened to in the year 2024 between January 1st and November 15th. This has be done to compare
 the results with the data from the Spotify Wrapped 2024. After some tests and playing with the parameters, we can see
 that the data from the Spotify Wrapped 2024 is not the same as the data from the streaming history. This can be due to 
 a variety of reasons: The criteria for counting or excluding playing time, the criteria for counting or excluding song
 plays...
 Until some angry Spotify developer speaks out or the algorithm becomes open source, we won´t know the truth.
 """

 def process_track(track, artist_count, artist_time, track_count, track_time, listening_time):
    listening_time[0] += track['msPlayed']
    if track['msPlayed'] < 30000:
        return
    if track.get('podcastName') is None:
        artist = track['artistName']
        song = track['trackName']
        ms_played = track['msPlayed']
    else:
        artist = track['podcastName']
        song = track['episodeName']
        ms_played = track['msPlayed']

    artist_count[artist] += 1
    artist_time[artist] += ms_played
    track_count[song] += 1
    track_time[song] += ms_played


 def main():
    artist_count = defaultdict(int)
    artist_time = defaultdict(int)
    track_count = defaultdict(int)
    track_time = defaultdict(int)
    listening_time = [0]

    with open('streaming_history_combined.json') as file:
        data = json.load(file)
        for item in data:
            for track in item:
                if '2024-01-01' <= track['endTime'][:10] <= '2024-11-15':
                    process_track(track, artist_count, artist_time, track_count, track_time, listening_time)

    top_10_artists_by_count = sorted(artist_count.items(), key=lambda x: x[1], reverse=True)[:10]
    top_10_artists_by_time = sorted(artist_time.items(), key=lambda x: x[1], reverse=True)[:10]
    top_10_tracks_by_count = sorted(track_count.items(), key=lambda x: x[1], reverse=True)[:10]
    top_10_tracks_by_time = sorted(track_time.items(), key=lambda x: x[1], reverse=True)[:10]

    print("Top 10 artists by count:")
    for artist, count in top_10_artists_by_count:
        print(f"{artist}: {count}")

    print("\nTop 10 artists by time:")
    for artist, time in top_10_artists_by_time:
        print(f"{artist}: {time / 1000 / 60 / 60:.2f} hours")

    print("\nTop 10 tracks by count:")
    for track, count in top_10_tracks_by_count:
        print(f"{track}: {count}")

    print("\nTop 10 tracks by time:")
    for track, time in top_10_tracks_by_time:
        print(f"{track}: {time / 1000 / 60 / 60:.2f} hours")

    print(f"\nTotal listening time (minutes): {listening_time[0] / 1000 / 60:.2f}")

 if __name__ == '__main__':
    main()
	import json
	from collections import defaultdict

	"""
	This script reads the streaming history data from the file 'streaming_history_combined.json' and processes it to find
	the top 10 artists and tracks by count and by time, as well as the total listening time. The data is filtered to only
	include tracks listened to in the year 2024 between January 1st and November 15th. This has be done to compare
	the results with the data from the Spotify Wrapped 2024. After some tests and playing with the parameters, we can see
	that the data from the Spotify Wrapped 2024 is not the same as the data from the streaming history. This can be due to
	a variety of reasons: The criteria for counting or excluding playing time, the criteria for counting or excluding song
	plays...
	Until some angry Spotify developer speaks out or the algorithm becomes open source, we won´t know the truth.
	"""

	def process_track(track, artist_count, artist_time, track_count, track_time, listening_time):
	listening_time[0] += track['msPlayed']
	if track['msPlayed'] < 30000:
	return
	if track.get('podcastName') is None:
	artist = track['artistName']
	song = track['trackName']
	ms_played = track['msPlayed']
	else:
	artist = track['podcastName']
	song = track['episodeName']
	ms_played = track['msPlayed']

	artist_count[artist] += 1
	artist_time[artist] += ms_played
	track_count[song] += 1
	track_time[song] += ms_played


	def main():
	artist_count = defaultdict(int)
	artist_time = defaultdict(int)
	track_count = defaultdict(int)
	track_time = defaultdict(int)
	listening_time = [0]

	with open('streaming_history_combined.json') as file:
	data = json.load(file)
	for item in data:
	for track in item:
	if '2024-01-01' <= track['endTime'][:10] <= '2024-11-15':
	process_track(track, artist_count, artist_time, track_count, track_time, listening_time)

	top_10_artists_by_count = sorted(artist_count.items(), key=lambda x: x[1], reverse=True)[:10]
	top_10_artists_by_time = sorted(artist_time.items(), key=lambda x: x[1], reverse=True)[:10]
	top_10_tracks_by_count = sorted(track_count.items(), key=lambda x: x[1], reverse=True)[:10]
	top_10_tracks_by_time = sorted(track_time.items(), key=lambda x: x[1], reverse=True)[:10]

	print("Top 10 artists by count:")
	for artist, count in top_10_artists_by_count:
	print(f"{artist}: {count}")

	print("\nTop 10 artists by time:")
	for artist, time in top_10_artists_by_time:
	print(f"{artist}: {time / 1000 / 60 / 60:.2f} hours")

	print("\nTop 10 tracks by count:")
	for track, count in top_10_tracks_by_count:
	print(f"{track}: {count}")

	print("\nTop 10 tracks by time:")
	for track, time in top_10_tracks_by_time:
	print(f"{track}: {time / 1000 / 60 / 60:.2f} hours")

	print(f"\nTotal listening time (minutes): {listening_time[0] / 1000 / 60:.2f}")

	if __name__ == '__main__':
	main()