This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa | |
# Load the audio file | |
AUDIO_FILE = './audio.wav' | |
samples, sample_rate = librosa.load(AUDIO_FILE, sr=None) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.io import wavfile | |
sample_rate, samples = wavfile.read(AUDIO_FILE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa.display | |
import matplotlib.pyplot as plt | |
# x-axis has been converted to time using our sample rate. | |
# matplotlib plt.plot(y), would output the same figure, but with sample | |
# number on the x-axis instead of seconds | |
plt.figure(figsize=(14, 5)) | |
librosa.display.waveplot(samples, sr=sample_rate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sgram = librosa.stft(samples) | |
librosa.display.specshow(sgram) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use the mel-scale instead of raw frequency | |
sgram_mag, _ = librosa.magphase(sgram) | |
mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sample_rate) | |
librosa.display.specshow(mel_scale_sgram) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use the decibel scale to get the final Mel Spectrogram | |
mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min) | |
librosa.display.specshow(mel_sgram, sr=sample_rate, x_axis='time', y_axis='mel') | |
plt.colorbar(format='%+2.0f dB') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print ('Example shape ', samples.shape, 'Sample rate ', sample_rate, 'Data type', type(samples)) | |
print (samples[22400:22420]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from IPython.display import Audio | |
Audio(AUDIO_FILE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Spectrogram is a 2D numpy array | |
print(type(mel_sgram), mel_sgram.shape) | |
# <class 'numpy.ndarray'> (128, 134) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math, random | |
import torch | |
import torchaudio | |
from torchaudio import transforms | |
from IPython.display import Audio | |
class AudioUtil(): | |
# ---------------------------- | |
# Load an audio file. Return the signal as a tensor and the sample rate | |
# ---------------------------- |
OlderNewer