Skip to content

Instantly share code, notes, and snippets.

@casperkaae
Last active April 2, 2017 22:32
Show Gist options
  • Save casperkaae/77a507ddf5a83b20a0dc6bf08c9d01b8 to your computer and use it in GitHub Desktop.
Save casperkaae/77a507ddf5a83b20a0dc6bf08c9d01b8 to your computer and use it in GitHub Desktop.
sound fun
# Packages we're using
import numpy as np
import copy
import soundfile as sf
def overlap(X, window_size, window_step):
"""
Create an overlapped version of X
Parameters
----------
X : ndarray, shape=(n_samples,)
Input signal to window and overlap
window_size : int
Size of windows to take
window_step : int
Step size between windows
Returns
-------
X_strided : shape=(n_windows, window_size)
2D array of overlapped X
"""
if window_size % 2 != 0:
raise ValueError("Window size must be even!")
# Make sure there are an even number of windows before stridetricks
append = np.zeros((window_size - len(X) % window_size))
X = np.hstack((X, append))
ws = window_size
ss = window_step
a = X
valid = int(len(a) - ws)
nw = int((valid) // ss)
out = np.ndarray((nw,ws),dtype = a.dtype)
for i in range(nw):
# "slide" the window along the samples
start = int(i * ss)
stop = int(start + ws)
out[i] = a[start : stop]
return out
def stft(X, window_size=128, fftsize=256, step=65, mean_normalize=True, real=False,
compute_onesided=True, windowtype='hamming'):
"""
Compute STFT for 1D real valued input X
"""
if real:
local_fft = np.fft.rfft
cut = -1
else:
local_fft = np.fft.fft
cut = None
if compute_onesided:
cut = fftsize // 2
if mean_normalize:
X -= X.mean()
X = overlap(X, window_size, step)
if windowtype == 'hamming':
win = np.hamming(window_size)
elif windowtype == 'bartlett':
win = np.bartlett(window_size)
elif windowtype == 'blackman':
win = np.blackman(window_size)
elif windowtype == 'kaiser':
win = np.kaiser(window_size)
#win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
else:
raise ValueError()
X = X * win[None]
X = local_fft(X,fftsize)[:, :cut]
return X
def spectrogram(d,log = True, thresh= 5, window_size = 256, fft_size = 512, step_size = 64, windowtype='hamming'):
"""
creates a spectrogram
log: take the log of the spectrgram
thresh: threshold minimum power for log spectrogram
"""
F = stft(d, window_size= window_size, fftsize=fft_size, step=step_size, real=False,
compute_onesided=True, windowtype=windowtype)
specgram = np.abs(F)
phase = np.angle(F)
if log == True:
specgram /= specgram.max() # volume normalize to max 1
specgram = np.log10(specgram) # take log
specgram[specgram < -thresh] = -thresh # set anything less than the threshold as the threshold
else:
specgram[specgram < thresh] = thresh # set anything less than the threshold as the threshold
return specgram, phase
def spectrogram_to_mel(spectrogram, mel_filter):
mel_spec = spectrogram.dot(mel_filter)
return mel_spec
def hz2mel(hz):
"""Convert a value in Hertz to Mels
:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
"""
return 2595 * np.log10(1+hz/700.)
def mel2hz(mel):
"""Convert a value in Mels to Hertz
:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
"""
return 700*(10**(mel/2595.0)-1)
def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
:param nfilt: the number of filters in the filterbank, default 20.
:param nfft: the FFT size. Default is 512.
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
:param lowfreq: lowest band edge of mel filters, default 0 Hz
:param highfreq: highest band edge of mel filters, default samplerate/2
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
"""
highfreq= highfreq or samplerate/2
assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
# compute points evenly spaced in mels
lowmel = hz2mel(lowfreq)
highmel = hz2mel(highfreq)
melpoints = np.linspace(lowmel,highmel,nfilt+2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = np.floor((nfft+1)*mel2hz(melpoints)/samplerate)
fbank = np.zeros([nfilt,nfft//2])
for j in range(0,nfilt):
for i in range(int(bin[j]), int(bin[j+1])):
fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
for i in range(int(bin[j+1]), int(bin[j+2])):
fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
return fbank
def create_mel_filter(fft_size, n_freq_components = 64, start_freq = 300, end_freq = 8000, samplerate=44100):
"""
Creates a filter to convolve with the spectrogram to get out mels
"""
mel_inversion_filter = get_filterbanks(nfilt=n_freq_components,
nfft=fft_size, samplerate=samplerate,
lowfreq=start_freq, highfreq=end_freq)
# Normalize filter
mel_filter = mel_inversion_filter / mel_inversion_filter.sum(axis=1,keepdims=True)
return mel_filter.T, mel_inversion_filter
def get_spectrograms(x, window_size, step_size, spec_thresh=6, n_mel_freq_components = 64, start_freq = 0, end_freq=None, samplerate=22050):
fft_size = window_size
n = x.shape[0]
x = x[:n-n%fft_size]
assert x.shape[0] % fft_size == 0
assert fft_size % step_size == 0
cut = int((fft_size//step_size)/2)
windowtype = 'hamming'
mel_filter, mel_inversion_filter = create_mel_filter(fft_size = fft_size,
n_freq_components = n_mel_freq_components,
start_freq = start_freq,
end_freq = end_freq,
samplerate = samplerate)
assert not np.isnan(mel_filter.sum()), 'nans in mel filter, try lowering n_mel_freq_components'
spec_linear,phase = spectrogram(x.astype('float64'), window_size=window_size, fft_size = fft_size,
step_size = step_size, log = True, thresh = spec_thresh,windowtype=windowtype)
spec_linear = spec_linear[:-cut]
phase = spec_linear[:-cut]
spec_mel = spectrogram_to_mel(spec_linear, mel_filter)
return x, spec_linear, phase, spec_mel
#SETTINGS
n_mel_freq_components = 80
start_freq = 200
spec_thresh = 6
step_size = 64
window_size = 1024
samplerate = 22050
######## GET SPECTOGRAM FOR A SINGLE FILE #########
### Discard phase
xshortened, spec_linear, phase, spec_mel = get_spectrograms(data, window_size, step_size, spec_thresh=spec_thresh, n_mel_freq_components = n_mel_freq_components, start_freq = start_freq, end_freq=None, samplerate=samplerate)
####### ######
recovered_audio_orig = griffin_lim(spec_linear, fft_size = fft_size,
step_size = step_size, log = True, n_iter = 50, pre_raise=1.1,windowtype=windowtype)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment