Last active
May 29, 2020 06:30
-
-
Save NP-chaonay/77d92e360762644005ab9791394c7bac to your computer and use it in GitHub Desktop.
Get specific fields of n randomly-selected songs metadata, on each mode group (Major-Minor), from MillionSongDataset.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import objects | |
import numpy as np | |
import pandas as pd | |
import h5py,random | |
# Prepare file | |
path='./Misc/msd_summary_file.h5' | |
hd5_file=h5py.File(path,'r') | |
# Shuffle and get n randomly-selected songs index (on each mode group (Major-Minor)) | |
maj_index=np.where( hd5_file['analysis']['songs']['mode']==0 )[0] | |
min_index=np.where( hd5_file['analysis']['songs']['mode']==1 )[0] | |
maj_index=np.random.permutation( maj_index ) | |
min_index=np.random.permutation( min_index ) | |
maj_index_selecting_start=round(random.randint(0,maj_index.size-100)) | |
min_index_selecting_start=round(random.randint(0,min_index.size-100)) | |
selected_maj_index=maj_index[maj_index_selecting_start:maj_index_selecting_start+100] | |
selected_min_index=min_index[min_index_selecting_start:min_index_selecting_start+100] | |
# Retrieve information from selected index | |
maj_songname=[] | |
maj_artistname=[] | |
maj_mode_confidence=[] | |
min_songname=[] | |
min_artistname=[] | |
min_mode_confidence=[] | |
for i in selected_maj_index: | |
maj_songname+=[hd5_file['metadata']['songs'][i]['title'].decode()] | |
maj_artistname+=[hd5_file['metadata']['songs'][i]['artist_name'].decode()] | |
maj_mode_confidence+=[hd5_file['analysis']['songs'][i]['mode_confidence']] | |
for i in selected_min_index: | |
min_songname+=[hd5_file['metadata']['songs'][i]['title'].decode()] | |
min_artistname+=[hd5_file['metadata']['songs'][i]['artist_name'].decode()] | |
min_mode_confidence+=[hd5_file['analysis']['songs'][i]['mode_confidence']] | |
maj_df=pd.DataFrame({'Name':maj_songname,'Artist':maj_artistname}) | |
min_df=pd.DataFrame({'Name':min_songname,'Artist':min_artistname}) | |
maj_df['Mode']=0 | |
min_df['Mode']=1 | |
maj_df['Mode_Confidence']=maj_mode_confidence | |
min_df['Mode_Confidence']=min_mode_confidence | |
df=pd.concat([maj_df,min_df]) | |
df=df.set_index('Name') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment