Skip to content

Instantly share code, notes, and snippets.

View fedden's full-sized avatar
🤖
Training deep audio models

Leon Fedden fedden

🤖
Training deep audio models
View GitHub Profile
import os
import librosa
directory = './path/to/my/audio/folder/'
for file in os.listdir(directory):
if file.endswith('.wav'):
file_path = os.path.join(directory, file)
audio_data, _ = librosa.load(file_path)
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen
def wavenet_encode(file_path):
# Load the model weights.
checkpoint_path = './wavenet-ckpt/model.ckpt-200000'
# Load and downsample the audio.
neural_sample_rate = 16000
import librosa
sample_rate = 44100
mfcc_size = 13
# Load the audio
pcm_data, _ = librosa.load(file_path)
# Compute a vector of n * 13 mfccs
mfccs = librosa.feature.mfcc(pcm_data,
ls -lah ./audio_dataset/
...
-rw-rw-r-- 1 tollie tollie 3.8M Jun 28 2014 HAL9K - Long Sustained Note.wav
-rw-rw-r-- 1 tollie tollie 2.7M Jul 2 2014 HAL9K - Lost Soul.wav
-rw-rw-r-- 1 tollie tollie 7.5M Jun 29 2014 HAL9K - Low Long Tail.wav
-rw-rw-r-- 1 tollie tollie 3.8M Jun 28 2014 HAL9K - Low Short.wav
-rw-rw-r-- 1 tollie tollie 4.6M Jun 28 2014 HAL9K - Low Thump.wav
-rw-rw-r-- 1 tollie tollie 4.6M Jul 2 2014 HAL9K - Lute 1.wav
-rw-rw-r-- 1 tollie tollie 7.7M Jul 2 2014 HAL9K - Lute 2.wav
import numpy as np
# Create some random MFCC shaped features as a sequence of 10 values
feature_sequence = np.random.random((10, 13))
# Get the standard deviation
stddev_features = np.std(feature_sequence, axis=0)
# Get the mean
mean_features = np.mean(feature_sequence, axis=0)
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
def get_pca(features):
pca = PCA(n_components=2)
transformed = pca.fit(features).transform(features)
scaler = MinMaxScaler()
scaler.fit(transformed)
return scaler.transform(transformed)
@fedden
fedden / umap.py
Created November 20, 2017 12:03
import umap
from sklearn.preprocessing import MinMaxScaler
def get_scaled_umap_embeddings(features, neighbour, distance):
embedding = umap.UMAP(n_neighbors=neighbour,
min_dist=distance,
metric='correlation').fit_transform(features)
scaler = MinMaxScaler()
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
def get_scaled_tsne_embeddings(features, perplexity, iteration):
embedding = TSNE(n_components=2,
perplexity=perplexity,
n_iter=iteration).fit_transform(features)
scaler = MinMaxScaler()
scaler.fit(embedding)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.