Last active
April 21, 2019 16:28
-
-
Save anisayari/32aa27252986353eb540019a58ee1665 to your computer and use it in GitHub Desktop.
dataset from lastfm by tag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pylast | |
import pandas as pd | |
from tqdm import tqdm | |
API_KEY_lastfm = "key_here" | |
API_SECRET_lastfm = " key_here" | |
username_lastfm = "username" | |
def build_dataset_by_tag(tag_list,output_file): | |
print('[INFO] Bulding Dataset by Tag....') | |
network_lastfm = pylast.LastFMNetwork(api_key=API_KEY_lastfm, api_secret=API_SECRET_lastfm,username=username_lastfm) | |
df = pd.DataFrame( columns=['uuid','style','artist','title_music','lastfm_music_url','lastfm_artist_url']) | |
tqdm.pandas() | |
for style in tag_list: | |
dict_to_add = { | |
'style': style, | |
} | |
tracks = network_lastfm.get_tag(style).get_top_tracks(limit=1000) | |
for track in tracks: | |
dict_to_add['artist'] = track.item.get_artist() | |
dict_to_add['title_music'] = track.item.get_name() | |
dict_to_add['lastfm_music_url'] = track.item.get_artist().get_url() | |
dict_to_add['lastfm_artist_url'] = track.item.get_url() | |
df = df.append(dict_to_add , ignore_index=True) | |
df['uuid'] = df.progress_apply(create_uuid_by_movie, axis=1) | |
df['videoID_youtube'] = "missing" | |
df.to_csv(output_file, sep=";", header=True, index=False) | |
print('[INFO] Bulding Dataset by Tag DONE') | |
output_file ='data/tracks_list.csv' | |
build_dataset_by_tag(['country','rap'], output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment