casperkaae · April 2, 2017 22:03
diff --git a/gistfile1.txt b/gistfile1.txt

 # Packages we're using
 import numpy as np
 import matplotlib.pyplot as plt
 import copy
 from scipy.io import wavfile
 import soundfile as sf

 from utils.snr import residual_snrdb


 def overlap(X, window_size, window_step):
    """
    Create an overlapped version of X
    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to window and overlap
    window_size : int
        Size of windows to take
    window_step : int
        Step size between windows
    Returns
    -------
    X_strided : shape=(n_windows, window_size)
        2D array of overlapped X
    """
    if window_size % 2 != 0:
        raise ValueError("Window size must be even!")
    # Make sure there are an even number of windows before stridetricks
    append = np.zeros((window_size - len(X) % window_size))
    X = np.hstack((X, append))

    ws = window_size
    ss = window_step
    a = X

    valid = int(len(a) - ws)
    nw = int((valid) // ss)
    out = np.ndarray((nw,ws),dtype = a.dtype)

    for i in range(nw):
        # "slide" the window along the samples
        start = int(i * ss)
        stop = int(start + ws)
        out[i] = a[start : stop]

    return out


 def stft(X, window_size=128, fftsize=256, step=65, mean_normalize=True, real=False,
         compute_onesided=True, windowtype='hamming'):
    """
    Compute STFT for 1D real valued input X
    """
    if real:
        local_fft = np.fft.rfft
        cut = -1
    else:
        local_fft = np.fft.fft
        cut = None
    if compute_onesided:
        cut = fftsize // 2
    if mean_normalize:
        X -= X.mean()

    X = overlap(X, window_size, step)
    
    if windowtype == 'hamming':
        win = np.hamming(window_size)
    elif windowtype == 'bartlett':
        win = np.bartlett(window_size)
    elif windowtype == 'blackman':
        win = np.blackman(window_size)
    elif windowtype == 'kaiser':
        win = np.kaiser(window_size)
        #win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
    else:
        raise ValueError()
    X = X * win[None]
    X = local_fft(X,fftsize)[:, :cut]
    return X

 def spectrogram(d,log = True, thresh= 5, window_size = 256, fft_size = 512, step_size = 64, windowtype='hamming'):
    """
    creates a spectrogram
    log: take the log of the spectrgram
    thresh: threshold minimum power for log spectrogram
    """
    F = stft(d, window_size= window_size, fftsize=fft_size, step=step_size, real=False,
        compute_onesided=True, windowtype=windowtype)
    specgram = np.abs(F)
    phase = np.angle(F) 
    if log == True:
        specgram /= specgram.max() # volume normalize to max 1
        specgram = np.log10(specgram) # take log
        specgram[specgram < -thresh] = -thresh # set anything less than the threshold as the threshold
    else:
        specgram[specgram < thresh] = thresh # set anything less than the threshold as the threshold
    
    return specgram, phase

 # Also mostly modified or taken from https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe
 def griffin_lim(X_s, log = True, fft_size = 512, step_size = 512/4, n_iter = 10, pre_raise=None, windowtype='hamming'):
        
    if log == True:
        X_s = np.power(10, X_s)
    if pre_raise:
        X_s = X_s**pre_raise


    X_s = np.concatenate([X_s, X_s[:, ::-1]], axis=1)
    X_t = iterate_invert_spectrogram(X_s, fft_size, step_size, n_iter=n_iter,windowtype=windowtype)
    return X_t

 def iterate_invert_spectrogram(X_s, fftsize, step, n_iter=10, verbose=False, windowtype='hamming'):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    reg = np.max(X_s) / 1E8
    X_best = copy.deepcopy(X_s)
    for i in range(n_iter):
        if verbose:
            print("Runnning iter %i" % i)
        if i == 0:
            X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                                     set_zero_phase=True,windowtype=windowtype)
        else:
            # Calculate offset was False in the MATLAB version
            # but in mine it massively improves the result
            # Possible bug in my impl?
            X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                                     set_zero_phase=False,windowtype=windowtype)
        est = stft(X_t, window_size=window_size, fftsize=fftsize, step=step, compute_onesided=False)
        phase = est / np.maximum(reg, np.abs(est))
        X_best = X_s * phase[:len(X_s)]
    X_t = invert_spectrogram(X_best, step, calculate_offset=True,
                             set_zero_phase=False)
    return np.real(X_t)

 def invert_spectrogram(X_s, step, calculate_offset=True, set_zero_phase=True, windowtype='hamming'):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    size = int(X_s.shape[1] // 2)
    wave = np.zeros(int(X_s.shape[0] * step + size))
    # Getting overflow warnings with 32 bit...
    wave = wave.astype('float64')
    total_windowing_sum = np.zeros(int(X_s.shape[0] * step + size))
    if windowtype == 'hamming':
        win = np.hamming(size)
    elif windowtype == 'bartlett':
        win = np.bartlett(size)
    elif windowtype == 'blackman':
        win = np.blackman(size)
    elif windowtype == 'kaiser':
        win = np.kaiser(size)
        #win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
    else:
        raise ValueError()
        
    est_start = int(size // 2) - 1
    est_end = est_start + size
    for i in range(X_s.shape[0]):
        wave_start = int(step * i)
        wave_end = int(wave_start + size)
        if set_zero_phase:
            spectral_slice = X_s[i].real + 0j
        else:
            # already complex
            spectral_slice = X_s[i]

        # Don't need fftshift due to different impl.
        wave_est = np.real(np.fft.ifft(spectral_slice))[::-1]
        if calculate_offset and i > 0:
            offset_size = int(size - step)
            if offset_size <= 0:
                print("WARNING: Large step size >50\% detected! "
                      "This code works best with high overlap - try "
                      "with 75% or greater")
                offset_size = step
            offset = xcorr_offset(wave[wave_start:wave_start + offset_size],
                                  wave_est[est_start:est_start + offset_size])
        else:
            offset = 0
        wave[wave_start:wave_end] += win * wave_est[
            est_start - offset:est_end - offset]
        total_windowing_sum[wave_start:wave_end] += win
    wave = np.real(wave) / (total_windowing_sum + 1E-6)
    return wave

 def xcorr_offset(x1, x2):
    """
    Under MSR-LA License
    Based on MATLAB implementation from Spectrogram Inversion Toolbox
    References
    ----------
    D. Griffin and J. Lim. Signal estimation from modified
    short-time Fourier transform. IEEE Trans. Acoust. Speech
    Signal Process., 32(2):236-243, 1984.
    Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
    Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
    Adelaide, 1994, II.77-80.
    Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
    Estimation from Modified Short-Time Fourier Transform
    Magnitude Spectra. IEEE Transactions on Audio Speech and
    Language Processing, 08/2007.
    """
    x1 = x1 - x1.mean()
    x2 = x2 - x2.mean()
    frame_size = len(x2)
    half = frame_size // 2
    corrs = np.convolve(x1.astype('float32'), x2[::-1].astype('float32'))
    corrs[:half] = -1E30
    corrs[-half:] = -1E30
    offset = corrs.argmax() - len(x1)
    return offset

 def spectrogram_to_mel(spectrogram, mel_filter):
    mel_spec = spectrogram.dot(mel_filter)
    return mel_spec

 def mel_to_spectrogram(mel_spec, mel_inversion_filter, spec_thresh):
    """
    takes in an mel spectrogram and returns a normal spectrogram for inversion 
    """
    mel_spec = mel_spec+spec_thresh
    uncompressed_spec = mel_spec.dot(mel_inversion_filter)
    return uncompressed_spec-spec_thresh
 # From https://github.com/jameslyons/python_speech_features

 def hz2mel(hz):
    """Convert a value in Hertz to Mels
    :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Mels. If an array was passed in, an identical sized array is returned.
    """
    return 2595 * np.log10(1+hz/700.)
    
 def mel2hz(mel):
    """Convert a value in Mels to Hertz
    :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
    """
    return 700*(10**(mel/2595.0)-1)

 def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
    """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
    to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
    :param lowfreq: lowest band edge of mel filters, default 0 Hz
    :param highfreq: highest band edge of mel filters, default samplerate/2
    :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
    """
    highfreq= highfreq or samplerate/2
    assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
    
    # compute points evenly spaced in mels
    lowmel = hz2mel(lowfreq)
    highmel = hz2mel(highfreq)
    melpoints = np.linspace(lowmel,highmel,nfilt+2)
    # our points are in Hz, but we use fft bins, so we have to convert
    #  from Hz to fft bin number
    bin = np.floor((nfft+1)*mel2hz(melpoints)/samplerate)

    fbank = np.zeros([nfilt,nfft//2])
    for j in range(0,nfilt):
        for i in range(int(bin[j]), int(bin[j+1])):
            fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
        for i in range(int(bin[j+1]), int(bin[j+2])):
            fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
    return fbank
               


 def create_mel_filter(fft_size, n_freq_components = 64, start_freq = 300, end_freq = 8000, samplerate=44100):
    """
    Creates a filter to convolve with the spectrogram to get out mels

    """
    mel_inversion_filter = get_filterbanks(nfilt=n_freq_components, 
                                           nfft=fft_size, samplerate=samplerate, 
                                           lowfreq=start_freq, highfreq=end_freq)
    # Normalize filter
    
    mel_filter = mel_inversion_filter / mel_inversion_filter.sum(axis=1,keepdims=True)

    return mel_filter.T, mel_inversion_filter







 def get_spectrograms(x, window_size, step_size, spec_thresh=6, n_mel_freq_components = 64, start_freq = 0, end_freq=None):  
    fft_size = window_size
    assert x.shape[0] % fft_size == 0
    assert fft_size % step_size == 0
    cut = int((fft_size//step_size)/2)

    windowtype = 'hamming'
    mel_filter, mel_inversion_filter = create_mel_filter(fft_size = fft_size,
                                                            n_freq_components = n_mel_freq_components,
                                                            start_freq = start_freq,
                                                            end_freq = end_freq)

    assert not np.isnan(mel_filter.sum()), 'nans in mel filter, try lowering n_mel_freq_components'
    
    spec_linear,phase = spectrogram(x.astype('float64'), window_size=window_size, fft_size = fft_size, 
                                       step_size = step_size, log = True, thresh = spec_thresh,windowtype=windowtype)

    spec_linear = spec_linear[:-cut]
    phase = spec_linear[:-cut]
    
    spec_mel = spectrogram_to_mel(spec_linear, mel_filter)
    return spec_linear, phase, spec_mel
    

    
 #SETTINGS 
 n_mel_freq_components = 80
 start_freq = 200
 spec_thresh = 6
 step_size = 64
 window_size = 1024

 ######## GET SPECTOGRAM FOR A SINGLE FILE #########

 ### Discard phase
 spec_linear, phase, spec_mel = get_spectrograms(data, window_size, step_size, spec_thresh=spec_thresh, n_mel_freq_components = n_mel_freq_components, start_freq = start_freq, end_freq=None)
    
    
 ####### ###### 
    
 recovered_audio_orig = griffin_lim(spec_linear, fft_size = fft_size,
                                            step_size = step_size, log = True, n_iter = 50, pre_raise=1.1,windowtype=windowtype)

	# Packages we're using
	import numpy as np
	import matplotlib.pyplot as plt
	import copy
	from scipy.io import wavfile
	import soundfile as sf

	from utils.snr import residual_snrdb


	def overlap(X, window_size, window_step):
	"""
	Create an overlapped version of X
	Parameters
	----------
	X : ndarray, shape=(n_samples,)
	Input signal to window and overlap
	window_size : int
	Size of windows to take
	window_step : int
	Step size between windows
	Returns
	-------
	X_strided : shape=(n_windows, window_size)
	2D array of overlapped X
	"""
	if window_size % 2 != 0:
	raise ValueError("Window size must be even!")
	# Make sure there are an even number of windows before stridetricks
	append = np.zeros((window_size - len(X) % window_size))
	X = np.hstack((X, append))

	ws = window_size
	ss = window_step
	a = X

	valid = int(len(a) - ws)
	nw = int((valid) // ss)
	out = np.ndarray((nw,ws),dtype = a.dtype)

	for i in range(nw):
	# "slide" the window along the samples
	start = int(i * ss)
	stop = int(start + ws)
	out[i] = a[start : stop]

	return out


	def stft(X, window_size=128, fftsize=256, step=65, mean_normalize=True, real=False,
	compute_onesided=True, windowtype='hamming'):
	"""
	Compute STFT for 1D real valued input X
	"""
	if real:
	local_fft = np.fft.rfft
	cut = -1
	else:
	local_fft = np.fft.fft
	cut = None
	if compute_onesided:
	cut = fftsize // 2
	if mean_normalize:
	X -= X.mean()

	X = overlap(X, window_size, step)

	if windowtype == 'hamming':
	win = np.hamming(window_size)
	elif windowtype == 'bartlett':
	win = np.bartlett(window_size)
	elif windowtype == 'blackman':
	win = np.blackman(window_size)
	elif windowtype == 'kaiser':
	win = np.kaiser(window_size)
	#win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
	else:
	raise ValueError()
	X = X * win[None]
	X = local_fft(X,fftsize)[:, :cut]
	return X

	def spectrogram(d,log = True, thresh= 5, window_size = 256, fft_size = 512, step_size = 64, windowtype='hamming'):
	"""
	creates a spectrogram
	log: take the log of the spectrgram
	thresh: threshold minimum power for log spectrogram
	"""
	F = stft(d, window_size= window_size, fftsize=fft_size, step=step_size, real=False,
	compute_onesided=True, windowtype=windowtype)
	specgram = np.abs(F)
	phase = np.angle(F)
	if log == True:
	specgram /= specgram.max() # volume normalize to max 1
	specgram = np.log10(specgram) # take log
	specgram[specgram < -thresh] = -thresh # set anything less than the threshold as the threshold
	else:
	specgram[specgram < thresh] = thresh # set anything less than the threshold as the threshold

	return specgram, phase

	# Also mostly modified or taken from https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe
	def griffin_lim(X_s, log = True, fft_size = 512, step_size = 512/4, n_iter = 10, pre_raise=None, windowtype='hamming'):

	if log == True:
	X_s = np.power(10, X_s)
	if pre_raise:
	X_s = X_s**pre_raise


	X_s = np.concatenate([X_s, X_s[:, ::-1]], axis=1)
	X_t = iterate_invert_spectrogram(X_s, fft_size, step_size, n_iter=n_iter,windowtype=windowtype)
	return X_t

	def iterate_invert_spectrogram(X_s, fftsize, step, n_iter=10, verbose=False, windowtype='hamming'):
	"""
	Under MSR-LA License
	Based on MATLAB implementation from Spectrogram Inversion Toolbox
	References
	----------
	D. Griffin and J. Lim. Signal estimation from modified
	short-time Fourier transform. IEEE Trans. Acoust. Speech
	Signal Process., 32(2):236-243, 1984.
	Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
	Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
	Adelaide, 1994, II.77-80.
	Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
	Estimation from Modified Short-Time Fourier Transform
	Magnitude Spectra. IEEE Transactions on Audio Speech and
	Language Processing, 08/2007.
	"""
	reg = np.max(X_s) / 1E8
	X_best = copy.deepcopy(X_s)
	for i in range(n_iter):
	if verbose:
	print("Runnning iter %i" % i)
	if i == 0:
	X_t = invert_spectrogram(X_best, step, calculate_offset=True,
	set_zero_phase=True,windowtype=windowtype)
	else:
	# Calculate offset was False in the MATLAB version
	# but in mine it massively improves the result
	# Possible bug in my impl?
	X_t = invert_spectrogram(X_best, step, calculate_offset=True,
	set_zero_phase=False,windowtype=windowtype)
	est = stft(X_t, window_size=window_size, fftsize=fftsize, step=step, compute_onesided=False)
	phase = est / np.maximum(reg, np.abs(est))
	X_best = X_s * phase[:len(X_s)]
	X_t = invert_spectrogram(X_best, step, calculate_offset=True,
	set_zero_phase=False)
	return np.real(X_t)

	def invert_spectrogram(X_s, step, calculate_offset=True, set_zero_phase=True, windowtype='hamming'):
	"""
	Under MSR-LA License
	Based on MATLAB implementation from Spectrogram Inversion Toolbox
	References
	----------
	D. Griffin and J. Lim. Signal estimation from modified
	short-time Fourier transform. IEEE Trans. Acoust. Speech
	Signal Process., 32(2):236-243, 1984.
	Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
	Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
	Adelaide, 1994, II.77-80.
	Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
	Estimation from Modified Short-Time Fourier Transform
	Magnitude Spectra. IEEE Transactions on Audio Speech and
	Language Processing, 08/2007.
	"""
	size = int(X_s.shape[1] // 2)
	wave = np.zeros(int(X_s.shape[0] * step + size))
	# Getting overflow warnings with 32 bit...
	wave = wave.astype('float64')
	total_windowing_sum = np.zeros(int(X_s.shape[0] * step + size))
	if windowtype == 'hamming':
	win = np.hamming(size)
	elif windowtype == 'bartlett':
	win = np.bartlett(size)
	elif windowtype == 'blackman':
	win = np.blackman(size)
	elif windowtype == 'kaiser':
	win = np.kaiser(size)
	#win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
	else:
	raise ValueError()

	est_start = int(size // 2) - 1
	est_end = est_start + size
	for i in range(X_s.shape[0]):
	wave_start = int(step * i)
	wave_end = int(wave_start + size)
	if set_zero_phase:
	spectral_slice = X_s[i].real + 0j
	else:
	# already complex
	spectral_slice = X_s[i]

	# Don't need fftshift due to different impl.
	wave_est = np.real(np.fft.ifft(spectral_slice))[::-1]
	if calculate_offset and i > 0:
	offset_size = int(size - step)
	if offset_size <= 0:
	print("WARNING: Large step size >50\% detected! "
	"This code works best with high overlap - try "
	"with 75% or greater")
	offset_size = step
	offset = xcorr_offset(wave[wave_start:wave_start + offset_size],
	wave_est[est_start:est_start + offset_size])
	else:
	offset = 0
	wave[wave_start:wave_end] += win * wave_est[
	est_start - offset:est_end - offset]
	total_windowing_sum[wave_start:wave_end] += win
	wave = np.real(wave) / (total_windowing_sum + 1E-6)
	return wave

	def xcorr_offset(x1, x2):
	"""
	Under MSR-LA License
	Based on MATLAB implementation from Spectrogram Inversion Toolbox
	References
	----------
	D. Griffin and J. Lim. Signal estimation from modified
	short-time Fourier transform. IEEE Trans. Acoust. Speech
	Signal Process., 32(2):236-243, 1984.
	Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
	Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
	Adelaide, 1994, II.77-80.
	Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
	Estimation from Modified Short-Time Fourier Transform
	Magnitude Spectra. IEEE Transactions on Audio Speech and
	Language Processing, 08/2007.
	"""
	x1 = x1 - x1.mean()
	x2 = x2 - x2.mean()
	frame_size = len(x2)
	half = frame_size // 2
	corrs = np.convolve(x1.astype('float32'), x2[::-1].astype('float32'))
	corrs[:half] = -1E30
	corrs[-half:] = -1E30
	offset = corrs.argmax() - len(x1)
	return offset

	def spectrogram_to_mel(spectrogram, mel_filter):
	mel_spec = spectrogram.dot(mel_filter)
	return mel_spec

	def mel_to_spectrogram(mel_spec, mel_inversion_filter, spec_thresh):
	"""
	takes in an mel spectrogram and returns a normal spectrogram for inversion
	"""
	mel_spec = mel_spec+spec_thresh
	uncompressed_spec = mel_spec.dot(mel_inversion_filter)
	return uncompressed_spec-spec_thresh
	# From https://github.com/jameslyons/python_speech_features

	def hz2mel(hz):
	"""Convert a value in Hertz to Mels
	:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
	:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
	"""
	return 2595 * np.log10(1+hz/700.)

	def mel2hz(mel):
	"""Convert a value in Mels to Hertz
	:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
	:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
	"""
	return 700(10*(mel/2595.0)-1)

	def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
	"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
	to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
	:param nfilt: the number of filters in the filterbank, default 20.
	:param nfft: the FFT size. Default is 512.
	:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
	:param lowfreq: lowest band edge of mel filters, default 0 Hz
	:param highfreq: highest band edge of mel filters, default samplerate/2
	:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
	"""
	highfreq= highfreq or samplerate/2
	assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"

	# compute points evenly spaced in mels
	lowmel = hz2mel(lowfreq)
	highmel = hz2mel(highfreq)
	melpoints = np.linspace(lowmel,highmel,nfilt+2)
	# our points are in Hz, but we use fft bins, so we have to convert
	# from Hz to fft bin number
	bin = np.floor((nfft+1)*mel2hz(melpoints)/samplerate)

	fbank = np.zeros([nfilt,nfft//2])
	for j in range(0,nfilt):
	for i in range(int(bin[j]), int(bin[j+1])):
	fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
	for i in range(int(bin[j+1]), int(bin[j+2])):
	fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
	return fbank



	def create_mel_filter(fft_size, n_freq_components = 64, start_freq = 300, end_freq = 8000, samplerate=44100):
	"""
	Creates a filter to convolve with the spectrogram to get out mels

	"""
	mel_inversion_filter = get_filterbanks(nfilt=n_freq_components,
	nfft=fft_size, samplerate=samplerate,
	lowfreq=start_freq, highfreq=end_freq)
	# Normalize filter

	mel_filter = mel_inversion_filter / mel_inversion_filter.sum(axis=1,keepdims=True)

	return mel_filter.T, mel_inversion_filter







	def get_spectrograms(x, window_size, step_size, spec_thresh=6, n_mel_freq_components = 64, start_freq = 0, end_freq=None):
	fft_size = window_size
	assert x.shape[0] % fft_size == 0
	assert fft_size % step_size == 0
	cut = int((fft_size//step_size)/2)

	windowtype = 'hamming'
	mel_filter, mel_inversion_filter = create_mel_filter(fft_size = fft_size,
	n_freq_components = n_mel_freq_components,
	start_freq = start_freq,
	end_freq = end_freq)

	assert not np.isnan(mel_filter.sum()), 'nans in mel filter, try lowering n_mel_freq_components'

	spec_linear,phase = spectrogram(x.astype('float64'), window_size=window_size, fft_size = fft_size,
	step_size = step_size, log = True, thresh = spec_thresh,windowtype=windowtype)

	spec_linear = spec_linear[:-cut]
	phase = spec_linear[:-cut]

	spec_mel = spectrogram_to_mel(spec_linear, mel_filter)
	return spec_linear, phase, spec_mel



	#SETTINGS
	n_mel_freq_components = 80
	start_freq = 200
	spec_thresh = 6
	step_size = 64
	window_size = 1024

	######## GET SPECTOGRAM FOR A SINGLE FILE #########

	### Discard phase
	spec_linear, phase, spec_mel = get_spectrograms(data, window_size, step_size, spec_thresh=spec_thresh, n_mel_freq_components = n_mel_freq_components, start_freq = start_freq, end_freq=None)


	####### ######

	recovered_audio_orig = griffin_lim(spec_linear, fft_size = fft_size,
	step_size = step_size, log = True, n_iter = 50, pre_raise=1.1,windowtype=windowtype)