Last active
January 10, 2023 12:54
-
-
Save jvbalen/596f5434c66373dfa578ac3164b13788 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import imageio | |
import numpy as np | |
import librosa as lr | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
def plot_wav(y, sr, start=0, stop=None, bit_depth=5, upsample=2, downsample=2): | |
fig, ax = plt.subplots(1, 1, figsize=[13, 5]) | |
t = lr.samples_to_time(np.arange(len(y)), sr=sr) | |
t, y = t[start:stop], y[start:stop] | |
# show unquantized waveform | |
new_sr = sr * upsample | |
y_cont = lr.resample(y, sr, new_sr) | |
t_cont = lr.samples_to_time(np.arange(len(y_cont)), sr=new_sr) + np.min(t) | |
ax.plot(t_cont, y_cont, '-', color='k', linewidth=0.7) | |
# plot quantized waveform | |
new_sr = sr / downsample | |
y_ = lr.resample(y, sr, new_sr) | |
t_ = t[::downsample] | |
t_ = np.repeat(t_, 2)[1:] | |
y_ = np.repeat(y_, 2)[:-1] | |
ax.plot(t_, y_, '-', color='k') | |
# add "scale bar" | |
bar_center = np.mean(ax.get_xlim()) | |
bar_len = 10**np.floor(np.log10(lr.samples_to_time(stop-start, sr=sr))) | |
bar_start = bar_center - bar_len/2 | |
bar_stop = bar_center + bar_len/2 | |
ax.plot([bar_start, bar_stop], [-0.8, -0.8], 'k', linewidth=1.5) | |
ax.text(bar_center, -0.7, format_small_time_interval(bar_len), size=14, horizontalalignment='center') | |
# add grid | |
if stop - start < 100: | |
grid_color = [(stop - start) / 100] * 3 | |
grid_amplitude_step = 1/(2**(bit_depth-1)) | |
grid_amplitudes = np.arange(-1, 1+grid_amplitude_step, grid_amplitude_step) | |
plt.hlines(grid_amplitudes, t[0], t[-1], color=grid_color, linewidth=0.5, linestyle='--') | |
grid_samples = np.arange(start, stop, downsample) | |
grid_times = lr.samples_to_time(grid_samples, sr=sr) | |
plt.vlines(grid_times, -1, 1, color=grid_color, linewidth=0.5, linestyle='--') | |
# turn off axes | |
ax.set_ylim([-1.05, 1.05]) | |
ax.axis('off') | |
return fig | |
def format_small_time_interval(t): | |
if t >= 1.0: | |
return f'{t:.3g} s' | |
elif t >= 1e-3: | |
return f'{t * 1e3:.3g} ms' | |
else: | |
return f'{t * 1e6:.3g} μs' | |
def half_cosine_window(n, min_=0.0, max_=1.0): | |
win = np.cos(np.arange(0., np.pi, np.pi / n)) * 0.5 + 0.5 | |
win = (max_ - min_) * win + min_ | |
return win | |
if __name__ == '__main__': | |
path = sys.argv[1] # path to audio file, only tried 44100Hz | |
out_dir = sys.argv[2] # must exist, will be spammed with png's | |
# parameters | |
max_amplitude = 0.65 | |
min_scale = 0.001 * 44100 # in samples | |
max_scale = 1 * 44100 | |
center = 44100 * 2 # center of segment in samples | |
zoom_frames = 100 | |
frame_rate = 12 | |
pause_frames = 12 | |
# load audio | |
y, sr = lr.load(path, sr=None) | |
y = max_amplitude * y / np.max(np.abs(y)) | |
# draw and save figures | |
filenames = [] | |
scales = half_cosine_window(zoom_frames, min_=np.log10(min_scale), max_=np.log10(max_scale)) | |
for i, scale in enumerate(tqdm(scales)): | |
start = int(center - 10**scale) | |
stop = int(center + 10**scale) | |
fig = plot_wav(y, sr, start=start, stop=stop, upsample=2, downsample=2); | |
filename = os.path.join(out_dir, f'wave_{i + 1}.png') | |
fig.savefig(filename, pad_inches=0.0, bbox_inches='tight') | |
filenames.append(filename) | |
plt.close(fig) | |
# add pauses, make symmetric | |
filenames = [filenames[0]] * (pause_frames // 2) + filenames + [filenames[-1]] * (pause_frames // 2) | |
filenames = filenames + filenames[::-1] | |
# write gif | |
with imageio.get_writer(os.path.join(out_dir, 'wave.gif'), mode='I', duration=1./frame_rate) as writer: | |
for filename in filenames: | |
image = imageio.imread(filename) | |
writer.append_data(image) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment