casperkaae · April 2, 2017 22:32
diff --git a/soundfun.py b/soundfun.py

 # Packages we're using
 import numpy as np
 import copy
 import soundfile as sf

 def overlap(X, window_size, window_step):
    """
    Create an overlapped version of X
    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to window and overlap
    window_size : int
        Size of windows to take
    window_step : int
        Step size between windows
    Returns
    -------
    X_strided : shape=(n_windows, window_size)
        2D array of overlapped X
    """
    if window_size % 2 != 0:
        raise ValueError("Window size must be even!")
    # Make sure there are an even number of windows before stridetricks
    append = np.zeros((window_size - len(X) % window_size))
    X = np.hstack((X, append))

    ws = window_size
    ss = window_step
    a = X

    valid = int(len(a) - ws)
    nw = int((valid) // ss)
    out = np.ndarray((nw,ws),dtype = a.dtype)

    for i in range(nw):
        # "slide" the window along the samples
        start = int(i * ss)
        stop = int(start + ws)
        out[i] = a[start : stop]

    return out


 def stft(X, window_size=128, fftsize=256, step=65, mean_normalize=True, real=False,
         compute_onesided=True, windowtype='hamming'):
    """
    Compute STFT for 1D real valued input X
    """
    if real:
        local_fft = np.fft.rfft
        cut = -1
    else:
        local_fft = np.fft.fft
        cut = None
    if compute_onesided:
        cut = fftsize // 2
    if mean_normalize:
        X -= X.mean()

    X = overlap(X, window_size, step)
    
    if windowtype == 'hamming':
        win = np.hamming(window_size)
    elif windowtype == 'bartlett':
        win = np.bartlett(window_size)
    elif windowtype == 'blackman':
        win = np.blackman(window_size)
    elif windowtype == 'kaiser':
        win = np.kaiser(window_size)
        #win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
    else:
        raise ValueError()
    X = X * win[None]
    X = local_fft(X,fftsize)[:, :cut]
    return X

 def spectrogram(d,log = True, thresh= 5, window_size = 256, fft_size = 512, step_size = 64, windowtype='hamming'):
    """
    creates a spectrogram
    log: take the log of the spectrgram
    thresh: threshold minimum power for log spectrogram
    """
    F = stft(d, window_size= window_size, fftsize=fft_size, step=step_size, real=False,
        compute_onesided=True, windowtype=windowtype)
    specgram = np.abs(F)
    phase = np.angle(F) 
    if log == True:
        specgram /= specgram.max() # volume normalize to max 1
        specgram = np.log10(specgram) # take log
        specgram[specgram < -thresh] = -thresh # set anything less than the threshold as the threshold
    else:
        specgram[specgram < thresh] = thresh # set anything less than the threshold as the threshold
    
    return specgram, phase


 def spectrogram_to_mel(spectrogram, mel_filter):
    mel_spec = spectrogram.dot(mel_filter)
    return mel_spec

 def hz2mel(hz):
    """Convert a value in Hertz to Mels
    :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Mels. If an array was passed in, an identical sized array is returned.
    """
    return 2595 * np.log10(1+hz/700.)
    
 def mel2hz(mel):
    """Convert a value in Mels to Hertz
    :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
    """
    return 700*(10**(mel/2595.0)-1)

 def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
    """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
    to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
    :param lowfreq: lowest band edge of mel filters, default 0 Hz
    :param highfreq: highest band edge of mel filters, default samplerate/2
    :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
    """
    highfreq= highfreq or samplerate/2
    assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
    
    # compute points evenly spaced in mels
    lowmel = hz2mel(lowfreq)
    highmel = hz2mel(highfreq)
    melpoints = np.linspace(lowmel,highmel,nfilt+2)
    # our points are in Hz, but we use fft bins, so we have to convert
    #  from Hz to fft bin number
    bin = np.floor((nfft+1)*mel2hz(melpoints)/samplerate)

    fbank = np.zeros([nfilt,nfft//2])
    for j in range(0,nfilt):
        for i in range(int(bin[j]), int(bin[j+1])):
            fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
        for i in range(int(bin[j+1]), int(bin[j+2])):
            fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
    return fbank
               


 def create_mel_filter(fft_size, n_freq_components = 64, start_freq = 300, end_freq = 8000, samplerate=44100):
    """
    Creates a filter to convolve with the spectrogram to get out mels

    """
    mel_inversion_filter = get_filterbanks(nfilt=n_freq_components, 
                                           nfft=fft_size, samplerate=samplerate, 
                                           lowfreq=start_freq, highfreq=end_freq)
    # Normalize filter
    
    mel_filter = mel_inversion_filter / mel_inversion_filter.sum(axis=1,keepdims=True)

    return mel_filter.T, mel_inversion_filter






 def get_spectrograms(x, window_size, step_size, spec_thresh=6, n_mel_freq_components = 64, start_freq = 0, end_freq=None, samplerate=22050):  
    fft_size = window_size

    n = x.shape[0]
    x = x[:n-n%fft_size] 
    
    assert x.shape[0] % fft_size == 0
    assert fft_size % step_size == 0
    
    cut = int((fft_size//step_size)/2)
    
    windowtype = 'hamming'
    mel_filter, mel_inversion_filter = create_mel_filter(fft_size = fft_size,
                                                            n_freq_components = n_mel_freq_components,
                                                            start_freq = start_freq,
                                                            end_freq = end_freq,
                                                            samplerate = samplerate)

    assert not np.isnan(mel_filter.sum()), 'nans in mel filter, try lowering n_mel_freq_components'
    
    spec_linear,phase = spectrogram(x.astype('float64'), window_size=window_size, fft_size = fft_size, 
                                       step_size = step_size, log = True, thresh = spec_thresh,windowtype=windowtype)

    spec_linear = spec_linear[:-cut]
    phase = spec_linear[:-cut]
    
    spec_mel = spectrogram_to_mel(spec_linear, mel_filter)
    return x, spec_linear, phase, spec_mel
    

    
 #SETTINGS 
 n_mel_freq_components = 80
 start_freq = 200
 spec_thresh = 6
 step_size = 64
 window_size = 1024
 samplerate = 22050

 ######## GET SPECTOGRAM FOR A SINGLE FILE #########

 ### Discard phase
 xshortened, spec_linear, phase, spec_mel = get_spectrograms(data, window_size, step_size, spec_thresh=spec_thresh, n_mel_freq_components = n_mel_freq_components, start_freq = start_freq, end_freq=None, samplerate=samplerate)
  
 ####### ###### 
    
 recovered_audio_orig = griffin_lim(spec_linear, fft_size = fft_size,
                                            step_size = step_size, log = True, n_iter = 50, pre_raise=1.1,windowtype=windowtype)

	# Packages we're using
	import numpy as np
	import copy
	import soundfile as sf

	def overlap(X, window_size, window_step):
	"""
	Create an overlapped version of X
	Parameters
	----------
	X : ndarray, shape=(n_samples,)
	Input signal to window and overlap
	window_size : int
	Size of windows to take
	window_step : int
	Step size between windows
	Returns
	-------
	X_strided : shape=(n_windows, window_size)
	2D array of overlapped X
	"""
	if window_size % 2 != 0:
	raise ValueError("Window size must be even!")
	# Make sure there are an even number of windows before stridetricks
	append = np.zeros((window_size - len(X) % window_size))
	X = np.hstack((X, append))

	ws = window_size
	ss = window_step
	a = X

	valid = int(len(a) - ws)
	nw = int((valid) // ss)
	out = np.ndarray((nw,ws),dtype = a.dtype)

	for i in range(nw):
	# "slide" the window along the samples
	start = int(i * ss)
	stop = int(start + ws)
	out[i] = a[start : stop]

	return out


	def stft(X, window_size=128, fftsize=256, step=65, mean_normalize=True, real=False,
	compute_onesided=True, windowtype='hamming'):
	"""
	Compute STFT for 1D real valued input X
	"""
	if real:
	local_fft = np.fft.rfft
	cut = -1
	else:
	local_fft = np.fft.fft
	cut = None
	if compute_onesided:
	cut = fftsize // 2
	if mean_normalize:
	X -= X.mean()

	X = overlap(X, window_size, step)

	if windowtype == 'hamming':
	win = np.hamming(window_size)
	elif windowtype == 'bartlett':
	win = np.bartlett(window_size)
	elif windowtype == 'blackman':
	win = np.blackman(window_size)
	elif windowtype == 'kaiser':
	win = np.kaiser(window_size)
	#win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (window_size - 1))
	else:
	raise ValueError()
	X = X * win[None]
	X = local_fft(X,fftsize)[:, :cut]
	return X

	def spectrogram(d,log = True, thresh= 5, window_size = 256, fft_size = 512, step_size = 64, windowtype='hamming'):
	"""
	creates a spectrogram
	log: take the log of the spectrgram
	thresh: threshold minimum power for log spectrogram
	"""
	F = stft(d, window_size= window_size, fftsize=fft_size, step=step_size, real=False,
	compute_onesided=True, windowtype=windowtype)
	specgram = np.abs(F)
	phase = np.angle(F)
	if log == True:
	specgram /= specgram.max() # volume normalize to max 1
	specgram = np.log10(specgram) # take log
	specgram[specgram < -thresh] = -thresh # set anything less than the threshold as the threshold
	else:
	specgram[specgram < thresh] = thresh # set anything less than the threshold as the threshold

	return specgram, phase


	def spectrogram_to_mel(spectrogram, mel_filter):
	mel_spec = spectrogram.dot(mel_filter)
	return mel_spec

	def hz2mel(hz):
	"""Convert a value in Hertz to Mels
	:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
	:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
	"""
	return 2595 * np.log10(1+hz/700.)

	def mel2hz(mel):
	"""Convert a value in Mels to Hertz
	:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
	:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
	"""
	return 700(10*(mel/2595.0)-1)

	def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
	"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
	to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
	:param nfilt: the number of filters in the filterbank, default 20.
	:param nfft: the FFT size. Default is 512.
	:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
	:param lowfreq: lowest band edge of mel filters, default 0 Hz
	:param highfreq: highest band edge of mel filters, default samplerate/2
	:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
	"""
	highfreq= highfreq or samplerate/2
	assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"

	# compute points evenly spaced in mels
	lowmel = hz2mel(lowfreq)
	highmel = hz2mel(highfreq)
	melpoints = np.linspace(lowmel,highmel,nfilt+2)
	# our points are in Hz, but we use fft bins, so we have to convert
	# from Hz to fft bin number
	bin = np.floor((nfft+1)*mel2hz(melpoints)/samplerate)

	fbank = np.zeros([nfilt,nfft//2])
	for j in range(0,nfilt):
	for i in range(int(bin[j]), int(bin[j+1])):
	fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
	for i in range(int(bin[j+1]), int(bin[j+2])):
	fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
	return fbank



	def create_mel_filter(fft_size, n_freq_components = 64, start_freq = 300, end_freq = 8000, samplerate=44100):
	"""
	Creates a filter to convolve with the spectrogram to get out mels

	"""
	mel_inversion_filter = get_filterbanks(nfilt=n_freq_components,
	nfft=fft_size, samplerate=samplerate,
	lowfreq=start_freq, highfreq=end_freq)
	# Normalize filter

	mel_filter = mel_inversion_filter / mel_inversion_filter.sum(axis=1,keepdims=True)

	return mel_filter.T, mel_inversion_filter






	def get_spectrograms(x, window_size, step_size, spec_thresh=6, n_mel_freq_components = 64, start_freq = 0, end_freq=None, samplerate=22050):
	fft_size = window_size

	n = x.shape[0]
	x = x[:n-n%fft_size]

	assert x.shape[0] % fft_size == 0
	assert fft_size % step_size == 0

	cut = int((fft_size//step_size)/2)

	windowtype = 'hamming'
	mel_filter, mel_inversion_filter = create_mel_filter(fft_size = fft_size,
	n_freq_components = n_mel_freq_components,
	start_freq = start_freq,
	end_freq = end_freq,
	samplerate = samplerate)

	assert not np.isnan(mel_filter.sum()), 'nans in mel filter, try lowering n_mel_freq_components'

	spec_linear,phase = spectrogram(x.astype('float64'), window_size=window_size, fft_size = fft_size,
	step_size = step_size, log = True, thresh = spec_thresh,windowtype=windowtype)

	spec_linear = spec_linear[:-cut]
	phase = spec_linear[:-cut]

	spec_mel = spectrogram_to_mel(spec_linear, mel_filter)
	return x, spec_linear, phase, spec_mel



	#SETTINGS
	n_mel_freq_components = 80
	start_freq = 200
	spec_thresh = 6
	step_size = 64
	window_size = 1024
	samplerate = 22050

	######## GET SPECTOGRAM FOR A SINGLE FILE #########

	### Discard phase
	xshortened, spec_linear, phase, spec_mel = get_spectrograms(data, window_size, step_size, spec_thresh=spec_thresh, n_mel_freq_components = n_mel_freq_components, start_freq = start_freq, end_freq=None, samplerate=samplerate)

	####### ######

	recovered_audio_orig = griffin_lim(spec_linear, fft_size = fft_size,
	step_size = step_size, log = True, n_iter = 50, pre_raise=1.1,windowtype=windowtype)