-
-
Save fakufaku/d395f5aa5e5c5fa07c7cbcf51d543413 to your computer and use it in GitHub Desktop.
| import torch | |
| import paderbox | |
| import numpy as np | |
| from scipy.signal import hamming, blackman, get_window, hann | |
| import matplotlib.pyplot as plt | |
| f = 1500.0 # exactly periodic | |
| fs = 48000.0 | |
| nfft = 512 | |
| hop = 128 | |
| demo = np.sin(2 * np.pi * f / fs * np.arange(fs)) | |
| # To make the comparison easier, we pad the input signal to | |
| # make it a multiple of the FFT size | |
| pad_size = nfft - hop | |
| padding = np.zeros(pad_size) | |
| demo = np.concatenate([demo, np.zeros(pad_size)]) | |
| # torch | |
| # - use `center=False` | |
| # - zero-pad the front of the signal | |
| demo_lr = np.concatenate([padding, demo]) | |
| demo_lr = torch.from_numpy(demo_lr) | |
| win_pt = torch.hamming_window(nfft, dtype=demo_lr.dtype) | |
| PT = torch.stft( | |
| demo_lr, | |
| n_fft=nfft, | |
| hop_length=hop, | |
| window=win_pt, | |
| return_complex=True, | |
| pad_mode="constant", | |
| center=False, | |
| ) | |
| recon_pt = torch.istft(PT, n_fft=nfft, hop_length=hop, window=win_pt, center=False) | |
| recon_pt = recon_pt[pad_size:] | |
| recon_pt = recon_pt.numpy() | |
| PT = PT.numpy() | |
| print("torch: reconstruction exact ?", np.allclose(recon_pt, demo)) | |
| # paderbox | |
| AR = paderbox.transform.stft( | |
| demo_lr, | |
| size=nfft, | |
| shift=hop, | |
| fading=False, | |
| window=hamming, | |
| # window=hamming_win, | |
| # symmetric_window=True, | |
| ) | |
| recon_ar = paderbox.transform.istft( | |
| AR, size=nfft, shift=hop, fading=False, window=hamming | |
| ) | |
| AR = AR.T | |
| recon_ar = recon_ar[pad_size:] | |
| print( | |
| "pader: reconstruction exact ?", | |
| np.allclose(recon_ar, demo), | |
| ) | |
| print("difference between librosa's and pra's STFT", abs(PT - AR).max()) |
If I change "hamming" to "blackman" (which is used in GSS), I get the following error:
RuntimeError: istft(CPUComplexDoubleType[257, 378], n_fft=512, hop_length=128, win_length=512, window=torch.DoubleTensor{[512]}, center=0, normalized=0, onesided=None, length=None, return_complex=0) window overlap add min: 0Perhaps related to the following issue: pytorch/pytorch#62323?
I tried to get around the issue by using the idea from this comment, i.e., win_pt = torch.blackman_window(nfft + 2, dtype=demo_lr.dtype)[1:-1], but then the difference between STFTs are large (0.42016923270740847).
I also reported the same issue! pytorch/pytorch#91309
Does it have to be Blackman ? You can also use center=True, which has slightly different padding, but should not change quality.
I also reported the same issue! pytorch/pytorch#91309 Does it have to be Blackman ? You can also use center=True, which has slightly different padding, but should not change quality.
I will try out other options and see how they compare in terms of downstream WER.
If I change "hamming" to "blackman" (which is used in GSS), I get the following error:
Perhaps related to the following issue: pytorch/pytorch#62323?