Skip to content

Instantly share code, notes, and snippets.

@NP-chaonay
Last active May 29, 2020 06:30
Show Gist options
  • Save NP-chaonay/77d92e360762644005ab9791394c7bac to your computer and use it in GitHub Desktop.
Save NP-chaonay/77d92e360762644005ab9791394c7bac to your computer and use it in GitHub Desktop.
Get specific fields of n randomly-selected songs metadata, on each mode group (Major-Minor), from MillionSongDataset.
# Import objects
import numpy as np
import pandas as pd
import h5py,random
# Prepare file
path='./Misc/msd_summary_file.h5'
hd5_file=h5py.File(path,'r')
# Shuffle and get n randomly-selected songs index (on each mode group (Major-Minor))
maj_index=np.where( hd5_file['analysis']['songs']['mode']==0 )[0]
min_index=np.where( hd5_file['analysis']['songs']['mode']==1 )[0]
maj_index=np.random.permutation( maj_index )
min_index=np.random.permutation( min_index )
maj_index_selecting_start=round(random.randint(0,maj_index.size-100))
min_index_selecting_start=round(random.randint(0,min_index.size-100))
selected_maj_index=maj_index[maj_index_selecting_start:maj_index_selecting_start+100]
selected_min_index=min_index[min_index_selecting_start:min_index_selecting_start+100]
# Retrieve information from selected index
maj_songname=[]
maj_artistname=[]
maj_mode_confidence=[]
min_songname=[]
min_artistname=[]
min_mode_confidence=[]
for i in selected_maj_index:
maj_songname+=[hd5_file['metadata']['songs'][i]['title'].decode()]
maj_artistname+=[hd5_file['metadata']['songs'][i]['artist_name'].decode()]
maj_mode_confidence+=[hd5_file['analysis']['songs'][i]['mode_confidence']]
for i in selected_min_index:
min_songname+=[hd5_file['metadata']['songs'][i]['title'].decode()]
min_artistname+=[hd5_file['metadata']['songs'][i]['artist_name'].decode()]
min_mode_confidence+=[hd5_file['analysis']['songs'][i]['mode_confidence']]
maj_df=pd.DataFrame({'Name':maj_songname,'Artist':maj_artistname})
min_df=pd.DataFrame({'Name':min_songname,'Artist':min_artistname})
maj_df['Mode']=0
min_df['Mode']=1
maj_df['Mode_Confidence']=maj_mode_confidence
min_df['Mode_Confidence']=min_mode_confidence
df=pd.concat([maj_df,min_df])
df=df.set_index('Name')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment