-
-
Save danstowell/06c2dd3c5c07e2f3603a8da817a4d362 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import imageio | |
import numpy as np | |
import librosa as lr | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
def plot_wav(y, sr, start=0, stop=None, bit_depth=5, upsample=2, downsample=2): | |
fig, ax = plt.subplots(1, 1, figsize=[13, 5]) | |
t = lr.samples_to_time(np.arange(len(y)), sr=sr) | |
t, y = t[start:stop], y[start:stop] | |
# show unquantized waveform | |
new_sr = sr * upsample | |
y_cont = lr.resample(y, sr, new_sr) | |
t_cont = lr.samples_to_time(np.arange(len(y_cont)), sr=new_sr) + np.min(t) | |
ax.plot(t_cont, y_cont, '-', color='k', linewidth=0.7) | |
# plot quantized waveform | |
new_sr = sr / downsample | |
y_ = lr.resample(y, sr, new_sr) | |
t_ = t[::downsample] | |
t_ = np.repeat(t_, 2)[1:] | |
y_ = np.repeat(y_, 2)[:-1] | |
ax.plot(t_, y_, '-', color='k') | |
# add "scale bar" | |
bar_center = np.mean(ax.get_xlim()) | |
bar_len = 10**np.floor(np.log10(lr.samples_to_time(stop-start, sr=sr))) | |
bar_start = bar_center - bar_len/2 | |
bar_stop = bar_center + bar_len/2 | |
ax.plot([bar_start, bar_stop], [-0.8, -0.8], 'k', linewidth=1.5) | |
ax.text(bar_center, -0.7, format_small_time_interval(bar_len), size=14, horizontalalignment='center') | |
# add grid | |
if stop - start < 100: | |
grid_color = [(stop - start) / 100] * 3 | |
grid_amplitude_step = 1/(2**(bit_depth-1)) | |
grid_amplitudes = np.arange(-1, 1+grid_amplitude_step, grid_amplitude_step) | |
plt.hlines(grid_amplitudes, t[0], t[-1], color=grid_color, linewidth=0.5, linestyle='--') | |
grid_samples = np.arange(start, stop, downsample) | |
grid_times = lr.samples_to_time(grid_samples, sr=sr) | |
plt.vlines(grid_times, -1, 1, color=grid_color, linewidth=0.5, linestyle='--') | |
# turn off axes | |
ax.set_ylim([-1.05, 1.05]) | |
ax.axis('off') | |
return fig | |
def format_small_time_interval(t): | |
if t >= 1.0: | |
return f'{t:.3g} s' | |
elif t >= 1e-3: | |
return f'{t * 1e3:.3g} ms' | |
else: | |
return f'{t * 1e6:.3g} μs' | |
def half_cosine_window(n, min_=0.0, max_=1.0): | |
win = np.cos(np.arange(0., np.pi, np.pi / n)) * 0.5 + 0.5 | |
win = (max_ - min_) * win + min_ | |
return win | |
if __name__ == '__main__': | |
path = sys.argv[1] # path to audio file, only tried 44100Hz | |
out_dir = sys.argv[2] # must exist, will be spammed with png's | |
# parameters | |
max_amplitude = 0.65 | |
min_scale = 0.001 * 44100 # in samples | |
max_scale = 1 * 44100 | |
center = 44100 * 2 # center of segment in samples | |
zoom_frames = 100 | |
frame_rate = 12 | |
pause_frames = 12 | |
# load audio | |
y, sr = lr.load(path, sr=None) | |
y = max_amplitude * y / np.max(np.abs(y)) | |
# draw and save figures | |
filenames = [] | |
scales = half_cosine_window(zoom_frames, min_=np.log10(min_scale), max_=np.log10(max_scale)) | |
for i, scale in enumerate(tqdm(scales)): | |
start = int(center - 10**scale) | |
stop = int(center + 10**scale) | |
fig = plot_wav(y, sr, start=start, stop=stop, upsample=2, downsample=2); | |
filename = os.path.join(out_dir, f'wave_{i + 1}.png') | |
fig.savefig(filename, pad_inches=0.0, bbox_inches='tight') | |
filenames.append(filename) | |
plt.close(fig) | |
# add pauses, make symmetric | |
filenames = [filenames[0]] * (pause_frames // 2) + filenames + [filenames[-1]] * (pause_frames // 2) | |
filenames = filenames + filenames[::-1] | |
# write gif | |
with imageio.get_writer(os.path.join(out_dir, 'wave.gif'), mode='I', duration=1./frame_rate) as writer: | |
for filename in filenames: | |
image = imageio.imread(filename) | |
writer.append_data(image) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment