-
-
Save sdoshi579/dbabc940cd8af6a1d9e37d2ffe2cb655 to your computer and use it in GitHub Desktop.
import librosa | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
import os | |
import csv | |
# Preprocessing | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder, StandardScaler | |
#Keras | |
import keras | |
from keras import models | |
from keras import layers | |
# generating a dataset | |
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate' | |
for i in range(1, 21): | |
header += f' mfcc{i}' | |
header += ' label' | |
header = header.split() | |
file = open('data.csv', 'w', newline='') | |
with file: | |
writer = csv.writer(file) | |
writer.writerow(header) | |
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() | |
for g in genres: | |
for filename in os.listdir(f'./genres/{g}'): | |
songname = f'./genres/{g}/{filename}' | |
y, sr = librosa.load(songname, mono=True, duration=30) | |
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) | |
rmse = librosa.feature.rmse(y=y) | |
spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr) | |
spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr) | |
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) | |
zcr = librosa.feature.zero_crossing_rate(y) | |
mfcc = librosa.feature.mfcc(y=y, sr=sr) | |
to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}' | |
for e in mfcc: | |
to_append += f' {np.mean(e)}' | |
to_append += f' {g}' | |
file = open('data.csv', 'a', newline='') | |
with file: | |
writer = csv.writer(file) | |
writer.writerow(to_append.split()) | |
# reading dataset from csv | |
data = pd.read_csv('data.csv') | |
data.head() | |
# Dropping unneccesary columns | |
data = data.drop(['filename'],axis=1) | |
data.head() | |
genre_list = data.iloc[:, -1] | |
encoder = LabelEncoder() | |
y = encoder.fit_transform(genre_list) | |
print(y) | |
# normalizing | |
scaler = StandardScaler() | |
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float)) | |
# spliting of dataset into train and test dataset | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | |
# creating a model | |
model = models.Sequential() | |
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],))) | |
model.add(layers.Dense(128, activation='relu')) | |
model.add(layers.Dense(64, activation='relu')) | |
model.add(layers.Dense(10, activation='softmax')) | |
model.compile(optimizer='adam', | |
loss='sparse_categorical_crossentropy', | |
metrics=['accuracy']) | |
history = model.fit(X_train, | |
y_train, | |
epochs=20, | |
batch_size=128) | |
# calculate accuracy | |
test_loss, test_acc = model.evaluate(X_test,y_test) | |
print('test_acc: ',test_acc) | |
# predictions | |
predictions = model.predict(X_test) | |
np.argmax(predictions[0]) | |
How can i load .h5 model, and use it to predict another audio files ? Thanks
@martinhoang11 In .h5 you can save weights only but if you want save the model architecture than you need to save it as json.
How to save and use the saved model ? You can look here: https://keras.io/getting-started/faq/#how-can-i-save-a-keras-model.
And for predicting from any audio file you just need to transform the audio file as we did for the dataset and just pass it into model.predict() .
How can i load .h5 model, and use it to predict another audio files ? Thanks
@martinhoang11 In .h5 you can save weights only but if you want save the model architecture than you need to save it as json.
How to save and use the saved model ? You can look here: https://keras.io/getting-started/faq/#how-can-i-save-a-keras-model.
And for predicting from any audio file you just need to transform the audio file as we did for the dataset and just pass it into model.predict() .
Thank you !
Hi, I have a question. How can i print predicted result as string like "male" or "female" after loaded model and run evaluate ? Thanks
hello,
when i test on genres it work fine , when i test on my set of songs it did not work . I am using two type of songs , it read one type but did not read other part for example when print (y) : 000000000000000000000000000000000000 ... is the printed but 111111111111111111111 is missing . would you please solve this issue . thanks
How do I access the data sets you reference in your blog? I tried both links from the blog but those links seem to be broken.
How can i load .h5 model, and use it to predict another audio files ? Thanks