Created
March 16, 2019 13:24
-
-
Save jakeybob/ebe2006d4073cc639092412bb2658807 to your computer and use it in GitHub Desktop.
R script for creating spectrogram videos from .wav files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(seewave) | |
library(ggplot2) | |
library(scales) | |
library(foreach) | |
library(doParallel) | |
registerDoParallel(parallel::detectCores()) | |
#### SETUP #### | |
file_wav <- "firestarter16.wav" | |
pic_dir <- "pics" | |
output_video <- "output.mp4" | |
base_freq <- 44.1e3 | |
window_width_secs <- 5 # duration that appears on screen at any one time | |
fps <- 24 | |
frame_duration <- 1/fps | |
db_limits <- c(-100, -20) # play with to get desired contrast | |
wave <- tuneR::readWave(file_wav) | |
wave_padded <- addsilw(wave=wave, f=base_freq, at="start", d=window_width_secs) | |
# padded_file_wav <- tempfile(fileext = ".wav") | |
padded_file_wav <- "temp.wav" | |
savewav(wave_padded, f=base_freq, filename = padded_file_wav) | |
total_length <- duration(wave_padded, f=base_freq) | |
start_time_secs <- 0 | |
stop_time_secs <- total_length | |
time_stamps <- seq(from = start_time_secs, to = stop_time_secs, by = frame_duration) | |
#### FFT / SAVE PICS #### | |
foreach (i = 1:length(time_stamps)) %dopar% { | |
filename <- file.path(pic_dir, paste0("pic_", i, ".png")) | |
w <- tuneR::readWave(padded_file_wav, from=time_stamps[i], to=time_stamps[i] + window_width_secs, units = "seconds") | |
print(paste(start_time_secs, "secs...")) | |
p <- ggspectro(w, f=base_freq, wn="hanning", ovlp=0) + | |
scale_fill_viridis_c(option="inferno", limits=db_limits, oob=squish) + | |
geom_tile(aes(fill = amplitude)) + | |
ylim(0, base_freq/2000) + | |
geom_vline(xintercept = 2.5, size=2) + | |
theme_void() + | |
theme(legend.position = "none", | |
axis.title.x=element_blank(), | |
axis.text.x=element_blank(), | |
axis.ticks.x=element_blank(), | |
axis.title.y=element_blank(), | |
axis.text.y=element_blank(), | |
axis.ticks.y=element_blank(), | |
panel.border = element_blank(), panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
plot.background=element_rect(fill = "black"), | |
panel.background = element_rect(fill = 'black')) | |
ggsave(plot=p, filename=filename, width=19.2, height=10.8, dpi=100) | |
} | |
#### FFMPEG #### | |
# command for ffmpeg, including audio offset so audio is in sync with centre-line | |
command <- paste0("ffmpeg -y -r ", fps, " -f image2 -s 1920x1080 -i ", pic_dir, | |
"/pic_%d.png -itsoffset ", window_width_secs/2, " -i ", file_wav, " -vcodec libx264 -crf 25 -pix_fmt yuv420p ", output_video) | |
system(command = command) | |
#### CLEAN UP #### | |
stopImplicitCluster() | |
unlink(padded_file_wav) | |
unlink(pic_dir, recursive = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
spectrogram_video.R
Video
Output video of Firestarter can be seen here (with the original audio), or – if that one disappears – here (with decidedly non-original audio).
A more visually entertaining, but less sonically catchy, video can be seen here. It's from the Aphex Twin track often known as "Forumla" or "Equation", but technically called …

Pics
Here's a 5-second snippet of Firestarter (using the viridis inferno colourmap).
Interesting to compare it to the same time period from the youtube audio version – say goodbye to any frequency >15 kHz.
And below is probably the most well known frequency space Easter egg from "Forumla". It looks somewhat more human on a log scale!
Notes
ffmpeg
to be in the system PATHdoParllel
(exhibited 3 – 4× speed-up on a 4 core machine). Also, doing a zillion individual FFTs means the discretization noise is different for every frame, so it has a nice film grainy effect rather than look like a sliding static image.