Skip to content

Instantly share code, notes, and snippets.

View dschwertfeger's full-sized avatar

David Schwertfeger dschwertfeger

View GitHub Profile
@dschwertfeger
dschwertfeger / model.py
Created March 21, 2020 05:29
An example of a neural network that uses a custom LogMelSpectrogram layer
def ConvModel(n_classes, sample_rate=16000, duration=4,
fft_size=_FFT_SIZE, hop_size=_HOP_SIZE, n_mels=_N_MEL_BINS):
n_samples = sample_rate * duration
# Accept raw audio data as input
x = Input(shape=(n_samples,), name='input', dtype='float32')
# Process into log-mel-spectrograms. (This is your custom layer!)
y = LogMelSpectrogram(sample_rate, fft_size, hop_size, n_mels)(x)
# Normalize data (on frequency axis)
y = BatchNormalization(axis=2)(y)
@dschwertfeger
dschwertfeger / power_to_db.py
Created March 19, 2020 05:05
Convert a power-spectrogram to decibel units in TensorFlow
def power_to_db(S, amin=1e-16, top_db=80.0):
"""Convert a power-spectrogram (magnitude squared) to decibel (dB) units.
Computes the scaling ``10 * log10(S / max(S))`` in a numerically
stable way.
Based on:
https://librosa.github.io/librosa/generated/librosa.core.power_to_db.html
"""
def _tf_log10(x):
@dschwertfeger
dschwertfeger / mel_spectrogram_layer.py
Created March 18, 2020 05:24
A custom Keras layer to transform raw audio to log-mel-spectrograms
class LogMelSpectrogram(tf.keras.layers.Layer):
"""Compute log-magnitude mel-scaled spectrograms."""
def __init__(self, sample_rate, fft_size, hop_size, n_mels,
f_min=0.0, f_max=None, **kwargs):
super(LogMelSpectrogram, self).__init__(**kwargs)
self.sample_rate = sample_rate
self.fft_size = fft_size
self.hop_size = hop_size
self.n_mels = n_mels
@dschwertfeger
dschwertfeger / convert.py
Created January 7, 2020 14:29
Script to convert WAV files to TFRecords
import argparse
import math
import os
from multiprocessing import Pool, cpu_count
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
@dschwertfeger
dschwertfeger / load_tfrecords.py
Last active January 10, 2022 10:17
A data-pipeline using TFRecords
import os
import tensorflow as tf
AUTOTUNE = tf.data.experimental.AUTOTUNE
def _parse_batch(record_batch, sample_rate, duration):
n_samples = sample_rate * duration
# Create a description of the features
import tensorflow as tf
def _bytestring_feature(list_of_bytestrings):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings))
def _int_feature(list_of_ints): # int64
return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints))
import pandas as pd
import tensorflow as tf
AUTOTUNE = tf.data.experimental.AUTOTUNE
def get_dataset(df):
file_path_ds = tf.data.Dataset.from_tensor_slices(df.file_path)
label_ds = tf.data.Dataset.from_tensor_slices(df.label)
return tf.data.Dataset.zip((file_path_ds, label_ds))
@dschwertfeger
dschwertfeger / convert.py
Last active September 13, 2023 20:23
Convert WAV files to TFRecord format
import argparse
import math
import os
import numpy as np
import pandas as pd
import tensorflow as tf
_BASE_DIR = os.path.dirname(os.path.abspath(__file__))