Skip to content

Instantly share code, notes, and snippets.

View fedden's full-sized avatar
🤖
Training deep audio models

Leon Fedden fedden

🤖
Training deep audio models
View GitHub Profile
# Create a random population of genes, with each genotype having 2 elements.
population_size = 200
population = np.random.standard_normal((population_size, 2))
# Some objective measure of the genes performance or fitness. This is dependant
# on the environment or optimsation problem that you have, and I do not define
# the function in this snippet.
fitnesses = evaluate_fitness(population)
# Sort the list of fitnesses and create a probability distribution based on
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
def get_scaled_tsne_embeddings(features, perplexity, iteration):
embedding = TSNE(n_components=2,
perplexity=perplexity,
n_iter=iteration).fit_transform(features)
scaler = MinMaxScaler()
scaler.fit(embedding)
@fedden
fedden / umap.py
Created November 20, 2017 12:03
import umap
from sklearn.preprocessing import MinMaxScaler
def get_scaled_umap_embeddings(features, neighbour, distance):
embedding = umap.UMAP(n_neighbors=neighbour,
min_dist=distance,
metric='correlation').fit_transform(features)
scaler = MinMaxScaler()
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
def get_pca(features):
pca = PCA(n_components=2)
transformed = pca.fit(features).transform(features)
scaler = MinMaxScaler()
scaler.fit(transformed)
return scaler.transform(transformed)
import numpy as np
# Create some random MFCC shaped features as a sequence of 10 values
feature_sequence = np.random.random((10, 13))
# Get the standard deviation
stddev_features = np.std(feature_sequence, axis=0)
# Get the mean
mean_features = np.mean(feature_sequence, axis=0)
ls -lah ./audio_dataset/
...
-rw-rw-r-- 1 tollie tollie 3.8M Jun 28 2014 HAL9K - Long Sustained Note.wav
-rw-rw-r-- 1 tollie tollie 2.7M Jul 2 2014 HAL9K - Lost Soul.wav
-rw-rw-r-- 1 tollie tollie 7.5M Jun 29 2014 HAL9K - Low Long Tail.wav
-rw-rw-r-- 1 tollie tollie 3.8M Jun 28 2014 HAL9K - Low Short.wav
-rw-rw-r-- 1 tollie tollie 4.6M Jun 28 2014 HAL9K - Low Thump.wav
-rw-rw-r-- 1 tollie tollie 4.6M Jul 2 2014 HAL9K - Lute 1.wav
-rw-rw-r-- 1 tollie tollie 7.7M Jul 2 2014 HAL9K - Lute 2.wav
import librosa
sample_rate = 44100
mfcc_size = 13
# Load the audio
pcm_data, _ = librosa.load(file_path)
# Compute a vector of n * 13 mfccs
mfccs = librosa.feature.mfcc(pcm_data,
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen
def wavenet_encode(file_path):
# Load the model weights.
checkpoint_path = './wavenet-ckpt/model.ckpt-200000'
# Load and downsample the audio.
neural_sample_rate = 16000