Skip to content

Instantly share code, notes, and snippets.

@MartinWeiss12
Last active March 4, 2024 02:35
Show Gist options
  • Save MartinWeiss12/28e7d007dec6e6f55d912901a972d424 to your computer and use it in GitHub Desktop.
Save MartinWeiss12/28e7d007dec6e6f55d912901a972d424 to your computer and use it in GitHub Desktop.
URI Matching
track_artist_album_df['Artist Image URL'] = track_artist_album_df['Artist URI']
.map(unique_artist_image_url_dict)
spotify_data = pd.merge(cleaned_df, track_artist_album_df, on='Track URI', how='left')
def get_top_100(entity, spotify_data):
if entity == 'Track':
spotify_data[['Track', 'Album']] = spotify_data[['Track', 'Album']]
.apply(lambda x: x.str.replace('Feat', 'feat'))
track_artist_groups = spotify_data.groupby(['Track', 'Artist'])
songs_dict = {}
for group, group_df in track_artist_groups:
max_uri = group_df['Track URI'].value_counts().idxmax()
group_df['Track URI'] = max_uri
songs_dict[group] = group_df
spotify_data = pd.concat(songs_dict.values(), ignore_index=True)
uri_counts = spotify_data[f'{entity} URI'].value_counts().reset_index(name='Streams')
uri_counts.columns = [f'{entity} URI', 'Streams']
spotify_data = pd.merge(spotify_data, uri_counts, on=f'{entity} URI', how='left')
spotify_data = spotify_data.drop_duplicates(subset=f'{entity} URI')
spotify_data.sort_values(by='Streams', ascending=False, inplace=True, ignore_index=True)
top_100 = spotify_data.head(100).copy().assign(Rank=lambda x: range(1, len(x) + 1))
top_100['Streams'] = top_100['Streams'].astype(int)
return top_100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment