anisayari · April 21, 2019 18:18
diff --git a/extract_features_from_songs.py b/extract_features_from_songs.py
 import librosa
 import numpy as np
 import pandas as pd
 from tqdm import tqdm

 def extract_features_from_a_song(x,sr):
    dict_features = {
        'zcr': np.mean(librosa.feature.zero_crossing_rate(x)),
        'chroma_stft': np.mean(librosa.feature.chroma_stft(x, sr=sr)),
        'mfcc': np.mean(librosa.feature.mfcc(x, sr=sr)),
        'spec_cent': np.mean(librosa.feature.spectral_centroid(x, sr=sr)),
        'spec_bw': np.mean(librosa.feature.spectral_bandwidth(x, sr=sr)),
        'spec_rolloff': np.mean(librosa.feature.spectral_centroid(x, sr=sr))
    }
    return dict_features

 def get_data_from_sound(row):
    dict_features = {}
    dict_features['chroma_stft'] = 0
    dict_features['mfcc'] = 0
    dict_features['spec_cent'] = 0
    dict_features['spec_rolloff'] = 0
    dict_features['spec_bw'] = 0
    dict_features['zcr'] = 0
    audio_path = 'data/music/{}/{}.mp3'.format(row['style'],str(row['uuid']))
    exists = os.path.isfile(audio_path)
    if exists :
        x, sr = librosa.load(audio_path)
        uuid = row['uuid']
        try:
            pass
            #create_waveform_image(x, sr, uuid)
        except OverflowError:
            print('Cannot save waveform file for',uuid)
        try:
            pass
            #create_spectrogram_image(x,sr,uuid)
        except OverflowError:
            print('Cannot save spectogram file for',uuid)
        try:
            dict_features = extract_features_from_a_song(x,sr)
        except OverflowError:
            print('Cannot run features extract for',uuid)
        print('{} DONE'.format(uuid))
    row['spec_cent'], row['spec_rolloff'], \
    row['spec_bw'], row['mfcc'], row['zcr'], row['chroma_stft'] = dict_features['spec_cent'], \
                                                                  dict_features['spec_rolloff'], \
                                                                  dict_features['spec_bw'], \
                                                                  dict_features['mfcc'], \
                                                                  dict_features['zcr'], \
                                                                  dict_features['chroma_stft']
    
    return row
 
 tqdm.pandas()
 df = pd.read_csv(output_file,sep=";", header=0)
 df = df.progress_apply(get_data_from_sound , axis=1)
	import librosa
	import numpy as np
	import pandas as pd
	from tqdm import tqdm

	def extract_features_from_a_song(x,sr):
	dict_features = {
	'zcr': np.mean(librosa.feature.zero_crossing_rate(x)),
	'chroma_stft': np.mean(librosa.feature.chroma_stft(x, sr=sr)),
	'mfcc': np.mean(librosa.feature.mfcc(x, sr=sr)),
	'spec_cent': np.mean(librosa.feature.spectral_centroid(x, sr=sr)),
	'spec_bw': np.mean(librosa.feature.spectral_bandwidth(x, sr=sr)),
	'spec_rolloff': np.mean(librosa.feature.spectral_centroid(x, sr=sr))
	}
	return dict_features

	def get_data_from_sound(row):
	dict_features = {}
	dict_features['chroma_stft'] = 0
	dict_features['mfcc'] = 0
	dict_features['spec_cent'] = 0
	dict_features['spec_rolloff'] = 0
	dict_features['spec_bw'] = 0
	dict_features['zcr'] = 0
	audio_path = 'data/music/{}/{}.mp3'.format(row['style'],str(row['uuid']))
	exists = os.path.isfile(audio_path)
	if exists :
	x, sr = librosa.load(audio_path)
	uuid = row['uuid']
	try:
	pass
	#create_waveform_image(x, sr, uuid)
	except OverflowError:
	print('Cannot save waveform file for',uuid)
	try:
	pass
	#create_spectrogram_image(x,sr,uuid)
	except OverflowError:
	print('Cannot save spectogram file for',uuid)
	try:
	dict_features = extract_features_from_a_song(x,sr)
	except OverflowError:
	print('Cannot run features extract for',uuid)
	print('{} DONE'.format(uuid))
	row['spec_cent'], row['spec_rolloff'], \
	row['spec_bw'], row['mfcc'], row['zcr'], row['chroma_stft'] = dict_features['spec_cent'], \
	dict_features['spec_rolloff'], \
	dict_features['spec_bw'], \
	dict_features['mfcc'], \
	dict_features['zcr'], \
	dict_features['chroma_stft']

	return row

	tqdm.pandas()
	df = pd.read_csv(output_file,sep=";", header=0)
	df = df.progress_apply(get_data_from_sound , axis=1)