Created
April 4, 2023 04:49
-
-
Save lynzrand/22d1c191fe7b92035cdb254c2359958e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
black==23.1.0 | |
click==8.1.3 | |
contourpy==1.0.7 | |
cycler==0.11.0 | |
fonttools==4.39.2 | |
kiwisolver==1.4.4 | |
matplotlib==3.7.1 | |
mypy-extensions==1.0.0 | |
numpy==1.24.2 | |
packaging==23.0 | |
pathspec==0.11.1 | |
Pillow==9.4.0 | |
pip==23.0.1 | |
platformdirs==3.1.1 | |
pydub==0.25.1 | |
pyparsing==3.0.9 | |
python-dateutil==2.8.2 | |
scipy==1.10.1 | |
setuptools==65.5.0 | |
six==1.16.0 | |
tomli==2.0.1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import scipy.signal as signal | |
import scipy.fftpack as fftpack | |
import scipy.io.wavfile as wavfile | |
from PIL import Image | |
# Load spectrogram and phase images | |
spectrogram_img = Image.open("spectrogram.png").convert("L") | |
phase_img = Image.open("phase.png").convert("HSV") | |
# Convert Spectrogram back to linear scale | |
spectrogram = np.asarray(spectrogram_img).astype(float) / 255 * 100 - 100 | |
spectrogram = 10 ** (spectrogram / 20) | |
spectrogram[spectrogram < 1e-10] = 0 | |
# Extract phase component from the HSV image | |
phase = np.asarray(phase_img)[:, :, 0].astype(float) / 255 * 2 * np.pi - np.pi | |
# Calculate window size and hop size | |
WINDOW_SIZE = len(spectrogram[0]) | |
hop_size = int(WINDOW_SIZE * 0.5) | |
# Calculate FFT frequency bins | |
freq_bins = fftpack.fftfreq(WINDOW_SIZE) | |
# Initialize audio signal array | |
signal = np.zeros((len(spectrogram) - 1) * hop_size + WINDOW_SIZE) | |
# Apply inverse FFT to each window | |
for i in range(len(spectrogram)): | |
# Get FFT magnitudes and phases for this window | |
fft_data = spectrogram[i] * np.exp(1j * phase[i]) | |
# Apply inverse FFT | |
window_signal = fftpack.ifft(fft_data) | |
# Add overlap with previous window to the signal | |
window_start = i * hop_size | |
window_end = window_start + WINDOW_SIZE | |
signal[window_start:window_end] += window_signal.real | |
# Scale signal up to 16-bit range | |
signal *= (2**15) - 1 | |
signal = signal.astype(np.int16) | |
# Save signal to a WAV file | |
samplerate = 44100 | |
wavfile.write("output.wav", samplerate, signal) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import scipy.signal as signal | |
import scipy.io.wavfile as wavfile | |
import scipy.fftpack as fftpack | |
from PIL import Image | |
# Load audio file | |
samplerate, data = wavfile.read("input.wav") | |
# data = data[:, 0] | |
# Normalize data to [-1, 1] | |
data = data.astype(float) / 2**15 | |
# Determine window size and overlap | |
WINDOW_SIZE = 1024 | |
OVERLAP = 0.5 | |
# Create window function | |
window = signal.blackman(WINDOW_SIZE) | |
# Calculate FFT frequency bins | |
freq_bins = fftpack.fftfreq(WINDOW_SIZE, d=1 / samplerate) | |
# Calculate number of windows and initialize spectrogram and phase arrays | |
hop_size = int(WINDOW_SIZE * (1 - OVERLAP)) | |
num_windows = int(np.ceil(len(data) / hop_size)) | |
spectrogram = np.zeros((num_windows, len(freq_bins))) | |
phase = np.zeros((num_windows, len(freq_bins))) | |
# Perform windowed FFT and populate arrays | |
for i in range(num_windows): | |
window_start = i * hop_size | |
window_end = window_start + WINDOW_SIZE | |
if window_end > len(data): | |
window_end = len(data) | |
window_length = window_end - window_start | |
# Append zeros to data if window is too short | |
if window_length < WINDOW_SIZE: | |
windowed_data = np.append( | |
data[window_start:window_end], np.zeros(WINDOW_SIZE - window_length) | |
) | |
else: | |
windowed_data = data[window_start:window_end] | |
windowed_data = windowed_data * window | |
fft_data = fftpack.fft(windowed_data, n=WINDOW_SIZE) | |
spectrogram[i, :] = np.abs(fft_data)[: len(freq_bins)] | |
phase[i, :] = np.angle(fft_data)[: len(freq_bins)] | |
# Convert spectrogram to dB scale | |
spectrogram += 1e-10 | |
spectrogram = 20 * np.log10(spectrogram) | |
# Normalize to [0, 255] where 0 = -100 dB and 255 = 0 dB | |
spectrogram = (spectrogram + 100) / 100 * 255 | |
# Saturate pixels that are out of range | |
spectrogram[spectrogram < 0] = 0 | |
spectrogram[spectrogram > 255] = 255 | |
spectrogram = spectrogram.astype(np.uint8) | |
spectrogram_image = Image.fromarray(spectrogram, "L") | |
# Normalize and convert phase to hue color image | |
phase_hue = (phase + np.pi) / (2 * np.pi) * 255 | |
phase_hue = phase_hue.astype(np.uint8) | |
# S=V=100% for all pixels | |
phase_saturation = np.ones(phase_hue.shape, dtype=np.uint8) * 255 | |
phase_value = np.ones(phase_hue.shape, dtype=np.uint8) * 255 | |
phase_image = Image.fromarray( | |
np.dstack((phase_hue, phase_saturation, phase_value)), "HSV" | |
) | |
# Convert it into RGB | |
phase_image = phase_image.convert("RGB") | |
# Save images as PNG files | |
spectrogram_image.save("spectrogram.png", "PNG") | |
phase_image.save("phase.png", "PNG") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment