Clojure/babashka script to help automate some of my video editing pipeline
#!/usr/bin/env bb
(ns vidwiz.main
"This is a prototype script for automating a portion of my video editing using ffmpeg."
(:require [ :refer [sh]]
[clojure.string :as st]
[cheshire.core :refer [parse-string]]))
;; util
(defn get-extension
(re-find #"\.[A-Za-z\d+]+" fname))
;; thanks to Burin (@burinc) for an improved impl.
(defn get-resolution
(when-let [{:keys [width height]}
(-> (sh "ffprobe"
"-of" "json"
(parse-string true)
[width height]))
(defn overlay-offsets
[{:keys [border base-dims overlay-dims pos gap fname]}]
(let [{:keys [width]} border
[cw ch] (map #(+ (* 2 width) %) overlay-dims)
{:keys [h v]} pos
[sw sh] base-dims]
[(cond (= h :l) gap
(= h :c) (- (/ sw 2) (/ cw 2))
(= h :r) (- sw gap cw))
(cond (= v :t) gap
(= v :c) (- (/ sh 2) (/ ch 2))
(= v :b) (- sh gap ch))]))
(defn get-bg-color
(let [nfname (st/replace fname (get-extension fname) ".png")]
(sh "ffmpeg" "-i" fname
"-frames:v" "1"
(str "[0:v]crop=4:4:100:500")
"-y" (str nfname))
(sh "convert" nfname "-colors" "1" nfname)
(let [col (->> (sh "identify" "-verbose" nfname)
(drop-while #(not (st/includes? % "Histogram")))
(re-find #"\#......"))]
(sh "rm" nfname)
(defn crop-pad-screen
"A multi-step transformation for screen recording footage.
The following sequence of transforms are handled using ffmpeg's 'filter_complex':
- crop and pad screen recording
- cut screen footage into left side and right side
- create a 1920x1080 image with the background color as the fill
- stitch left and right side back together
- overlay stitched screen recording onto the bg image with calculated offset values"
[{:keys [fname left right] :as m}]
(let [[w h] (get-resolution fname)
props (merge m {:border {:width 0 :color ""}
:base-dims [1920 1080]
:overlay-dims [(+ (:width left) (:width right)) h]})
[ow oh] (overlay-offsets props)
col (get-bg-color fname)]
(sh "ffmpeg"
"-i" fname
"-f" "lavfi"
"-i" (str "color=" col ":s=1920x1080")
(str "[0:v]crop=" (:width left) ":" h ":" (:offset left) ":0[l];"
"[0:v]crop=" (:width right) ":" h ":" (- w (:width right) (:offset right)) ":0[r];"
"[1:v][scr]overlay=" ow ":" oh ":shortest=1")
"-c:a" "copy" "-y" "")))
(defn clap-time
"Find time in seconds at which a clap is detected in the audio stream of fname.
The detection assumes that a clap sound exists within the first 12 seconds of a given clip."
(->> (sh "ffmpeg" "-i" fname
"-ss" "00:00:00" "-t" "00:00:12"
"-af" "silencedetect=noise=0.5:d=0.01"
"-f" "null" "-")
(drop-while #(not (st/includes? % "silence_end:")))
(re-find #"silence_end: .+")
(re-find #"\d+\.\d+")
(defn overlay-camera
"Composes the final footage by overlaying the camera footage onto the screen footage according to given properties.
The composition is handled using ffmpeg's 'filter_complex', and several actions occur:
- overlays camera footage with border onto screen footage
- given screen footage, camera footage, and border width and color create combined video
- calculate camera delay using clap times in footage. assumes screen recording is longer than cam
- calculate size of border for camera
- create border as a solid color frame
- scale camera down to given overlay-dims
- overlay camera onto border frame
- overlay bordered camera onto screen footage with calculated offsets"
[{:keys [border overlay-dims camf scrf] :as props}]
(let [{:keys [width color]} border
[cw ch] (map #(+ (* 2 width) %) overlay-dims)
[ow oh] (overlay-offsets (assoc props :fname scrf
:base-dims (get-resolution scrf)))
delay (- (clap-time scrf) (clap-time camf))]
(sh "ffmpeg"
"-i" scrf
"-i" camf
"-f" "lavfi"
"-i" (str "color=" color ":s=" cw "x" ch)
(str "[1:v]scale=" (apply str (interpose "x" overlay-dims)) "[scv];"
"[2:v][scv]overlay=" width ":" width ":shortest=1[cam];"
"[cam]setpts=PTS-STARTPTS+" delay "/TB[dcam];"
"[0:v][dcam]overlay=" ow ":" oh ":shortest=1")
"-c:a" "copy" "-y" "")))
(defn fix-audio
"Fixes issue where mono audio track plays only to the Left channel."
(sh "ffmpeg" "-i" fname
"-i" fname "-af" "pan=mono|c0=FL"
"-c:v" "copy" "-map" "0:v:0" "-map" "1:a:0" ""))
#_(spit "props.edn"
{:fname ""
:left {:width 667 :offset 0} ;; offset from left side
:right {:width 750 :offset 0} ;; offset from right side
:gap 100
:pos {:h :l :v :c}} ;; :h can be [:l :c :r] :v can be [:t :c :b]
{:camf ""
:scrf "" ;; this is the hardcoded output filename from (crop-pad-screen fname)
:border {:width 7 :color "cyan"}
:overlay-dims [480 270] ;; dims of camera excluding border
:gap 70
:pos {:h :r :v :b}}})
(defn main
"Main runs when vidwiz is run as a script.
You can run this program with babashka:
- chmod +x vidwiz.clj
- ./vidwiz props.edn"
(let [fname (first *command-line-args*)
props (when (= (get-extension fname) ".edn")
(read-string (slurp fname)))]
(when props
(crop-pad-screen (:screen props))
(overlay-camera (:camera props))
(fix-audio ""))))
Comment can be moved to ns docstring.

Comment blocks before function declarations can be moved to function docstrings (if they documents those functions).

Serioga, thank you for pointing this out. A simple change, but definitely a good one for better code style and clarity. I appreciate it.

burinc commented Feb 20, 2021

I found that get-resolution method match something else that is not intended.
Since Babashka have support library for Json, in this case cheshire so I refactoring the code as follow:

(require '[cheshire.core :refer [parse-string]])
(defn get-resolution
  (when-let [{:keys [width height]}
             (-> (sh "ffprobe"
                     "-of" "json"
                 (parse-string true)
    [width height]))

And it seems to be working better

@burinc, thanks for your contribution! I've updated the gist with your implementation. Gave it a try in my terminal and it's a nice improvement for sure :).

