This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ConvModel(n_classes, sample_rate=16000, duration=4, | |
fft_size=_FFT_SIZE, hop_size=_HOP_SIZE, n_mels=_N_MEL_BINS): | |
n_samples = sample_rate * duration | |
# Accept raw audio data as input | |
x = Input(shape=(n_samples,), name='input', dtype='float32') | |
# Process into log-mel-spectrograms. (This is your custom layer!) | |
y = LogMelSpectrogram(sample_rate, fft_size, hop_size, n_mels)(x) | |
# Normalize data (on frequency axis) | |
y = BatchNormalization(axis=2)(y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def power_to_db(S, amin=1e-16, top_db=80.0): | |
"""Convert a power-spectrogram (magnitude squared) to decibel (dB) units. | |
Computes the scaling ``10 * log10(S / max(S))`` in a numerically | |
stable way. | |
Based on: | |
https://librosa.github.io/librosa/generated/librosa.core.power_to_db.html | |
""" | |
def _tf_log10(x): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class LogMelSpectrogram(tf.keras.layers.Layer): | |
"""Compute log-magnitude mel-scaled spectrograms.""" | |
def __init__(self, sample_rate, fft_size, hop_size, n_mels, | |
f_min=0.0, f_max=None, **kwargs): | |
super(LogMelSpectrogram, self).__init__(**kwargs) | |
self.sample_rate = sample_rate | |
self.fft_size = fft_size | |
self.hop_size = hop_size | |
self.n_mels = n_mels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import math | |
import os | |
from multiprocessing import Pool, cpu_count | |
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
from tqdm import tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import tensorflow as tf | |
AUTOTUNE = tf.data.experimental.AUTOTUNE | |
def _parse_batch(record_batch, sample_rate, duration): | |
n_samples = sample_rate * duration | |
# Create a description of the features |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
def _bytestring_feature(list_of_bytestrings): | |
return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings)) | |
def _int_feature(list_of_ints): # int64 | |
return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import tensorflow as tf | |
AUTOTUNE = tf.data.experimental.AUTOTUNE | |
def get_dataset(df): | |
file_path_ds = tf.data.Dataset.from_tensor_slices(df.file_path) | |
label_ds = tf.data.Dataset.from_tensor_slices(df.label) | |
return tf.data.Dataset.zip((file_path_ds, label_ds)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import math | |
import os | |
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
_BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |