import os
import sys
import imageio
import numpy as np
import librosa as lr
import matplotlib.pyplot as plt
from tqdm import tqdm
def plot_wav(y, sr, start=0, stop=None, bit_depth=5, upsample=2, downsample=2):
fig, ax = plt.subplots(1, 1, figsize=[13, 5])
t = lr.samples_to_time(np.arange(len(y)), sr=sr)
t, y = t[start:stop], y[start:stop]
# show unquantized waveform
new_sr = sr * upsample
y_cont = lr.resample(y, sr, new_sr)
t_cont = lr.samples_to_time(np.arange(len(y_cont)), sr=new_sr) + np.min(t)
ax.plot(t_cont, y_cont, '-', color='k', linewidth=0.7)
# plot quantized waveform
new_sr = sr / downsample
y_ = lr.resample(y, sr, new_sr)
t_ = t[::downsample]
t_ = np.repeat(t_, 2)[1:]
y_ = np.repeat(y_, 2)[:-1]
ax.plot(t_, y_, '-', color='k')
# add "scale bar"
bar_center = np.mean(ax.get_xlim())
bar_len = 10**np.floor(np.log10(lr.samples_to_time(stop-start, sr=sr)))
bar_start = bar_center - bar_len/2
bar_stop = bar_center + bar_len/2
ax.plot([bar_start, bar_stop], [-0.8, -0.8], 'k', linewidth=1.5)
ax.text(bar_center, -0.7, format_small_time_interval(bar_len), size=14, horizontalalignment='center')
# add grid
if stop - start < 100:
grid_color = [(stop - start) / 100] * 3
grid_amplitude_step = 1/(2**(bit_depth-1))
grid_amplitudes = np.arange(-1, 1+grid_amplitude_step, grid_amplitude_step)
plt.hlines(grid_amplitudes, t[0], t[-1], color=grid_color, linewidth=0.5, linestyle='--')
grid_samples = np.arange(start, stop, downsample)
grid_times = lr.samples_to_time(grid_samples, sr=sr)
plt.vlines(grid_times, -1, 1, color=grid_color, linewidth=0.5, linestyle='--')
# turn off axes
ax.set_ylim([-1.05, 1.05])
return fig
def format_small_time_interval(t):
if t >= 1.0:
return f'{t:.3g} s'
elif t >= 1e-3:
return f'{t * 1e3:.3g} ms'
return f'{t * 1e6:.3g} μs'
def half_cosine_window(n, min_=0.0, max_=1.0):
win = np.cos(np.arange(0., np.pi, np.pi / n)) * 0.5 + 0.5
win = (max_ - min_) * win + min_
return win
if __name__ == '__main__':
path = sys.argv[1] # path to audio file, only tried 44100Hz
out_dir = sys.argv[2] # must exist, will be spammed with png's
# parameters
max_amplitude = 0.65
min_scale = 0.001 * 44100 # in samples
max_scale = 1 * 44100
center = 44100 * 2 # center of segment in samples
zoom_frames = 100
frame_rate = 12
pause_frames = 12
# load audio
y, sr = lr.load(path, sr=None)
y = max_amplitude * y / np.max(np.abs(y))
# draw and save figures
filenames = []
scales = half_cosine_window(zoom_frames, min_=np.log10(min_scale), max_=np.log10(max_scale))
for i, scale in enumerate(tqdm(scales)):
start = int(center - 10**scale)
stop = int(center + 10**scale)
fig = plot_wav(y, sr, start=start, stop=stop, upsample=2, downsample=2);
filename = os.path.join(out_dir, f'wave_{i + 1}.png')
fig.savefig(filename, pad_inches=0.0, bbox_inches='tight')
# add pauses, make symmetric
filenames = [filenames[0]] * (pause_frames // 2) + filenames + [filenames[-1]] * (pause_frames // 2)
filenames = filenames + filenames[::-1]
# write gif
with imageio.get_writer(os.path.join(out_dir, 'wave.gif'), mode='I', duration=1./frame_rate) as writer:
for filename in filenames:
image = imageio.imread(filename)
