Skip to content

Instantly share code, notes, and snippets.

@ketanhdoshi
ketanhdoshi / load_audio.py
Last active February 14, 2021 01:52
Load Audio file
import librosa
# Load the audio file
AUDIO_FILE = './audio.wav'
samples, sample_rate = librosa.load(AUDIO_FILE, sr=None)
@ketanhdoshi
ketanhdoshi / load_audio_scipy.py
Last active February 14, 2021 02:09
Load Audio file in scipy
from scipy.io import wavfile
sample_rate, samples = wavfile.read(AUDIO_FILE)
import librosa.display
import matplotlib.pyplot as plt
# x-axis has been converted to time using our sample rate.
# matplotlib plt.plot(y), would output the same figure, but with sample
# number on the x-axis instead of seconds
plt.figure(figsize=(14, 5))
librosa.display.waveplot(samples, sr=sample_rate)
sgram = librosa.stft(samples)
librosa.display.specshow(sgram)
@ketanhdoshi
ketanhdoshi / mel_scale.py
Last active February 14, 2021 02:51
Mel Scale
# use the mel-scale instead of raw frequency
sgram_mag, _ = librosa.magphase(sgram)
mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sample_rate)
librosa.display.specshow(mel_scale_sgram)
@ketanhdoshi
ketanhdoshi / mel_spectrogram.py
Last active November 17, 2021 03:56
Mel Spectrogram Decibel
# use the decibel scale to get the final Mel Spectrogram
mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min)
librosa.display.specshow(mel_sgram, sr=sample_rate, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
print ('Example shape ', samples.shape, 'Sample rate ', sample_rate, 'Data type', type(samples))
print (samples[22400:22420])
@ketanhdoshi
ketanhdoshi / play_audio.py
Created February 14, 2021 01:54
Play Audio in Cell
from IPython.display import Audio
Audio(AUDIO_FILE)
@ketanhdoshi
ketanhdoshi / mel_spectrogram_shape.py
Created February 14, 2021 03:32
Mel Spectrogram Shape
#Spectrogram is a 2D numpy array
print(type(mel_sgram), mel_sgram.shape)
# <class 'numpy.ndarray'> (128, 134)
@ketanhdoshi
ketanhdoshi / audio_util_load.py
Last active November 12, 2021 18:20
Audio Util
import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio
class AudioUtil():
# ----------------------------
# Load an audio file. Return the signal as a tensor and the sample rate
# ----------------------------