Last active
November 4, 2019 18:21
-
-
Save drscotthawley/3c812fde932c649cd3cd41ea8a29803c to your computer and use it in GitHub Desktop.
Alternative method for calculating "instantaneous frequency" for use with spectrograms.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Test script for Phase 'Unrolling' / Instantaneous frequency | |
# | |
# See Jesse Engel's "rainbowgrams" script, https://gist.github.com/jesseengel/e223622e255bd5b8c9130407397a0494 | |
# | |
# Modifications by Scott H. Hawley, @drscotthawley and Billy Mitchell | |
# These modified versions seem to be both more accurate (~2000x less reconstruction error) | |
# and faster (>20%) | |
# | |
# note, to really see/hear the difference, change dtypes to np.float16! | |
import librosa | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import argparse | |
def rewrap(phase_angle): # opposite of unwrap; handy for testing but I don't actually like to use it | |
return (phase_angle + np.pi) % (2 * np.pi) - np.pi | |
def phase2if(phase_angle, use_unwrap=False, use_where=True): | |
# Calculate 'dphase' Instantaneous Frequency, normalized to [-1,1] | |
if use_unwrap: # Engel et al, GANSynth 2017 paper & code | |
phase_angle = np.unwrap(phase_angle) # unwrap leads to CLOP | |
dphase = phase_angle[:, 1:] - phase_angle[:, :-1] | |
if not use_unwrap: | |
# Hawley's method: don't unroll the phase, that leads to CLOP. All we want is the difference | |
if use_where: | |
dphase = np.where(dphase > np.pi, dphase-2*np.pi, dphase) # keep it bounded | |
dphase = np.where(dphase < -np.pi, dphase+2*np.pi, dphase) # keep it bounded | |
else: | |
dphase = rewrap(dphase) # this works ok but loses a little bit of precision | |
dphase = np.concatenate([phase_angle[:, 0:1], dphase], axis=1) | |
return dphase/np.pi | |
def if2phase(dphase, use_cumsum=False): | |
# undoes phase2if, above | |
dphase = dphase*np.pi # undo normalization by pi | |
if use_cumsum: | |
phase_angle = np.cumsum(dphase, axis=1) # cumsum leads to CLOP | |
else: | |
# Integrate dphase forward but make it wrap at +/- pi as we go, to avoid CLOP | |
phase_angle = dphase | |
for i in range(1, dphase.shape[-1]): | |
phase_angle[:,i] = (phase_angle[:, i-1] + dphase[:, i]) | |
phase_angle[:,i] = np.where(phase_angle[:,i] >= np.pi, phase_angle[:,i] - 2*np.pi, phase_angle[:,i]) | |
phase_angle[:,i] = np.where(phase_angle[:,i] < -np.pi, phase_angle[:,i] + 2*np.pi, phase_angle[:,i]) | |
return phase_angle | |
def audio_test(path=None, ax=1, peak=70.0, use_google=True, n_fft=2048, | |
win_length=512, hop_length=256, debug=False): | |
print("Loading from file",path) | |
# find mag and phase, then phase unroll | |
audio, sr = librosa.load(path) | |
audio = audio.astype(np.float32) | |
n = len(audio) | |
audio_pad = librosa.util.fix_length(audio, n + n_fft // 2) | |
#C = librosa.stft(audio_pad, n_fft=n_fft) | |
C = librosa.stft(audio, n_fft=n_fft, win_length=win_length, hop_length=hop_length, center=True) | |
mag = np.abs(C) | |
phase_angle = np.angle(C) | |
# convert to 'instantaneous frequency', which we call dphase | |
dphase = phase2if(phase_angle, use_unwrap=use_google) | |
# In case you want to convert magnitude to power in dB... | |
#mag_db = (librosa.power_to_db(mag**2, amin=1e-13, top_db=peak, ref=np.max) / peak) + 1 | |
# ...in other code, here's where we'd do the autoencoder on mag and dphase | |
# Undo what we did above: reconstruct phase from dphase | |
phase_angle_hat = if2phase(dphase, use_cumsum=use_google) | |
phase_hat = np.exp(1j*phase_angle_hat) # convert from just angle to complex number | |
# combine new mag and phase to complex spectrogram C_hat, and generate output audio via ISTFT | |
mag_hat = mag | |
C_hat = mag_hat * phase_hat # combine into new complex array | |
audio_out = librosa.istft(C_hat, length=n, win_length=win_length, hop_length=hop_length, center=True) | |
if debug: | |
diff = audio - audio_out | |
print("absmax audio diff = ",np.max(np.abs(diff))) | |
plt.title("input audio - output audio") | |
plt.plot(diff) | |
plt.show() | |
librosa.output.write_wav('test_out.wav',audio_out,sr) | |
return audio_out | |
if __name__ == '__main__': | |
parser= argparse.ArgumentParser(description='none', formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument('--path', help='location of file', default='./Leadfoot_ScottHawley_clip.wav') | |
args=parser.parse_args() | |
for use_google in [True, False]: # Engel et al's version vs. my version | |
audio_test(path=args.path, use_google=use_google, debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just noting here for future reference:
"One-step robust deep learning phase unwrapping " by Wang et al, April 2019:
https://www.osapublishing.org/DirectPDFAccess/2C694D16-BD6C-C4A2-226D6623E18ACAF2_412322/oe-27-10-15100.pdf?da=1&id=412322&seq=0&mobile=no