Skip to content

Instantly share code, notes, and snippets.

@bsless
Created May 9, 2021 13:20
Show Gist options
  • Save bsless/2a0737989ba31a5b808fe68c3a8f834e to your computer and use it in GitHub Desktop.
Save bsless/2a0737989ba31a5b808fe68c3a8f834e to your computer and use it in GitHub Desktop.
Babashka wrapper for youtube-dl, mainly for parallelism
(ns ytdl
(:require
[babashka.fs :as fs]
[babashka.process :refer [process]]
[cheshire.core :as json]
[clojure.core.async :as async]
[clojure.edn :as edn]
[clojure.java.io :as io]
[clojure.tools.cli :refer [parse-opts]]
[clojure.string :as str]))
(def boolean-options
#{:help
:version
:update
:ignore-errors
:abort-on-error
:dump-user-agent
:list-extractors
:extractor-descriptions
:force-generic-extractor
:ignore-config
:flat-playlist
:mark-watched
:no-mark-watched
:no-color
:force-ipv4
:force-ipv6
:geo-bypass
:no-geo-bypass
:no-playlist
:yes-playlist
:include-ads
:skip-unavailable-fragments
:abort-on-unavailable-fragment
:keep-fragments
:no-resize-buffer
:playlist-reverse
:playlist-random
:xattr-set-filesize
:hls-prefer-native
:hls-prefer-ffmpeg
:hls-use-mpegts
:id
:restrict-filenames
:no-overwrites
:continue
:no-continue
:no-part
:no-mtime
:write-description
:write-info-json
:write-annotations
:no-cache-dir
:rm-cache-dir
:write-thumbnail
:write-all-thumbnails
:list-thumbnails
:quiet
:no-warnings
:simulate
:skip-download
:get-url
:get-title
:get-id
:get-thumbnail
:get-description
:get-duration
:get-filename
:get-format
:dump-json
:dump-single-json
:print-json
:newline
:no-progress
:console-title
:verbose
:dump-pages
:write-pages
:print-traffic
:call-home
:no-call-home
:no-check-certificate
:prefer-insecure
:bidi-workaround
:all-formats
:prefer-free-formats
:list-formats
:youtube-skip-dash-manifest
:write-sub
:write-auto-sub
:all-subs
:list-subs
:netrc
:ap-list-mso
:extract-audio
:keep-video
:no-post-overwrites
:embed-subs
:embed-thumbnail
:add-metadata
:xattrs
:prefer-avconv
:prefer-ffmpeg})
(def options
#{:default-search
:config-location
:proxy
:socket-timeout
:source-address
:geo-verification-proxy
:geo-bypass-country
:geo-bypass-ip-block
:playlist-start
:playlist-end
:playlist-items
:match-title
:reject-title
:max-downloads
:min-filesize
:max-filesize
:date
:datebefore
:dateafter
:min-views
:max-views
:match-filter
:age-limit
:download-archive
:limit-rate
:retries
:fragment-retries
:buffer-size
:http-chunk-size
:external-downloader
:external-downloader-args
:batch-file
:output
:output-na-placeholder
:autonumber-start
:load-info-json
:cookies
:cache-dir
:encoding
:user-agent
:referer
:add-header
:sleep-interval
:max-sleep-interval
:format
:merge-output-format
:sub-format
:sub-lang
:username
:password
:twofactor
:video-password
:ap-mso
:ap-username
:ap-password
:audio-format
:audio-quality
:recode-video
:postprocessor-args
:metadata-from-title
:fixup
:ffmpeg-location
:exec
:convert-subs})
(def extractor->url-format
{"youtube" "https://youtube.com/watch?v=%s"}
)
(defonce logger (agent nil))
(defmacro -log [& msg]
(let [m (meta &form)
_ns (ns-name *ns*) ;; can also be used for logging
file *file*]
`(binding [*out* *err*] ;; or bind to (io/writer log-file)
(println (str ~file ":" ~(:line m) ":" ~(:column m)) ~@msg))))
(defn -println
[& args]
(send logger (fn [_] (-log args))))
(defmacro log
[& args]
(let [{:keys [line column]} (meta &form)
_ns (ns-name *ns*) ;; can also be used for logging
]
`(send
logger
(fn [~'_]
(binding [*out* *err*] ;; or bind to (io/writer log-file)
(println (str #_~file ":" ~line ":" ~column) ~@args))))))
(defn logxf
[msg]
(map (fn [x] (log msg x) x)))
(defn parse
[s]
(json/parse-string s true))
(defn list-videos!
[s]
(log "listing videos from" s)
(:out
@(process
["youtube-dl"
"--dump-json"
"--ignore-errors"
"--flat-playlist"
"--skip-download"
s]
{:out :string})))
(defn find-extractor
[entry]
(str/lower-case
(or
(:extractor_key entry)
(:ie_key entry))))
(defn video-entry->url
[entry]
(format
(get extractor->url-format (find-extractor entry))
(:url entry)))
(defn ensure-file
[f]
(if (fs/exists? f)
f
(do (fs/create-dirs (fs/parent f))
(fs/create-file f)
f)))
(defn load-archive
[f]
(with-open [rdr (io/reader (ensure-file f))]
(into #{} (line-seq rdr))))
(defn list-playlists
[in out]
(async/pipeline-async
1
out
(fn [v o]
(async/go
(let [v (async/<! (async/go (list-videos! v)))]
(log "ret val:" v)
(async/>! o v))
(async/close! o)))
in))
(defn entry->archive-key [entry] (str (find-extractor entry) " " (:url entry)))
(defn process-url-xf
[archive]
(comp
(logxf "before split")
(map str/split-lines)
cat
(map parse)
(logxf "entry:")
(remove
(fn [entry]
(let [k (entry->archive-key entry)]
(if (contains? archive k)
(do (log "key in archive:" k) true)
false))))))
(defn process-urls
[in out archive]
(let [xf (process-url-xf archive)]
(async/pipeline 1 out xf in)))
(defn prepare-options
[opts]
(let [bo (select-keys opts boolean-options)
os (select-keys opts options)
args (into ["youtube-dl"] (comp (map (fn [[k v]] (when v (str "--" (name k))))) (remove nil?)) bo)]
(into args (comp (map (fn [[k v]] (when v [(str "--" (name k)) (str v)]))) (remove nil?) cat) os)))
(defn youtube-dl
([url]
(youtube-dl url {}))
([url opts]
(youtube-dl url opts {}))
([url opts popts]
(let [args (conj (prepare-options opts) url)]
(log "args:" args)
(process args popts))))
(defn download-entry
[entry opts popts]
(let [url (video-entry->url entry)]
(log "downloading url" url)
(let [res @(youtube-dl url opts popts)]
(log "finished downloading url" url)
(when-not (:error res)
(entry->archive-key entry)))))
(defn download-videos!!
[n from to opts popts]
(async/pipeline-blocking
n
to
(map (fn [entry] (download-entry entry opts popts)))
from))
(defn re-archive
[from f]
(async/thread
(with-open [w (io/writer f :append true)]
(loop []
(when-let [k (async/<!! from)]
(try
(doto w
(.write k)
(.write "\n")
(.flush))
(recur)
(catch Exception e
(log "error:" e)
nil)))))))
(defn main
([playlists]
(main 1 playlists))
([n playlists]
(main n "archive.txt" playlists))
([n archive-file playlists]
(main n archive-file playlists {}))
([n archive-file playlists opts]
(main n archive-file playlists opts {}))
([n archive-file playlists opts popts]
(let [in (async/to-chan playlists)
archive (load-archive archive-file)
downloaded (async/chan 1)
videos (async/chan 1)
urls (async/chan 1)]
(re-archive downloaded archive-file)
(download-videos!! n videos downloaded opts popts)
(process-urls urls videos archive)
(list-playlists in urls))))
(defn maybe-url
[s]
(try
(java.net.URL. s)
(catch Exception _
nil)))
(def cli-options
;; An option with a required argument
[["-n" "--threads THREADS" "Number of parallel video downloads"
:default 1
:parse-fn #(Integer/parseInt %)
:validate [#(< 0 % 17) "Must be a number between 1 and 16"]]
["-a" "--archive ARCHIVE" "archive file"
:default "archive.txt"
:parse-fn #(ensure-file %)]
["-p" "--playlists PLAYLISTS" "playlists, file or edn sequence"
:parse-fn #(let [u (maybe-url %)]
(cond
(fs/exists? %) (str/split-lines (slurp %))
u [u]
:else (edn/read-string %)))
:validate [#(seq %)]]
["-o" "--options OPTIONS" "youtube-dl options"
:default {}
:parse-fn #(edn/read-string %)]
["-P" "--process-options PROCESS-OPTIONS" "process builder options"
:default {}
:parse-fn #(edn/read-string %)]
["-h" "--help"]])
(defn -main
[& args]
(let [{:keys [threads archive playlists options process-options]}
(parse-opts args cli-options)]
(assert playlists "Need something to download")
(main threads archive playlists options process-options)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment