ketanhdoshi

ketanhdoshi / load_audio.py

Last active February 14, 2021 01:52

Load Audio file

	import librosa

	# Load the audio file
	AUDIO_FILE = './audio.wav'
	samples, sample_rate = librosa.load(AUDIO_FILE, sr=None)

ketanhdoshi / load_audio_scipy.py

Last active February 14, 2021 02:09

Load Audio file in scipy

	from scipy.io import wavfile
	sample_rate, samples = wavfile.read(AUDIO_FILE)

ketanhdoshi / display_audio.py

Last active February 14, 2021 02:15

	import librosa.display
	import matplotlib.pyplot as plt

	# x-axis has been converted to time using our sample rate.
	# matplotlib plt.plot(y), would output the same figure, but with sample
	# number on the x-axis instead of seconds
	plt.figure(figsize=(14, 5))
	librosa.display.waveplot(samples, sr=sample_rate)

ketanhdoshi / spectrogram.py

Last active February 14, 2021 02:36

	sgram = librosa.stft(samples)
	librosa.display.specshow(sgram)

ketanhdoshi / mel_scale.py

Last active February 14, 2021 02:51

Mel Scale

	# use the mel-scale instead of raw frequency
	sgram_mag, _ = librosa.magphase(sgram)
	mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sample_rate)
	librosa.display.specshow(mel_scale_sgram)

ketanhdoshi / mel_spectrogram.py

Last active November 17, 2021 03:56

Mel Spectrogram Decibel

	# use the decibel scale to get the final Mel Spectrogram
	mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min)
	librosa.display.specshow(mel_sgram, sr=sample_rate, x_axis='time', y_axis='mel')
	plt.colorbar(format='%+2.0f dB')

ketanhdoshi / audio_array.py

Last active February 14, 2021 02:02

	print ('Example shape ', samples.shape, 'Sample rate ', sample_rate, 'Data type', type(samples))
	print (samples[22400:22420])

ketanhdoshi / play_audio.py

Created February 14, 2021 01:54

Play Audio in Cell

	from IPython.display import Audio
	Audio(AUDIO_FILE)

ketanhdoshi / mel_spectrogram_shape.py

Created February 14, 2021 03:32

Mel Spectrogram Shape

	#Spectrogram is a 2D numpy array
	print(type(mel_sgram), mel_sgram.shape)
	# <class 'numpy.ndarray'> (128, 134)

ketanhdoshi / audio_util_load.py

Last active November 12, 2021 18:20

Audio Util

	import math, random
	import torch
	import torchaudio
	from torchaudio import transforms
	from IPython.display import Audio

	class AudioUtil():
	# ----------------------------
	# Load an audio file. Return the signal as a tensor and the sample rate
	# ----------------------------