Created
May 9, 2021 13:20
-
-
Save bsless/2a0737989ba31a5b808fe68c3a8f834e to your computer and use it in GitHub Desktop.
Babashka wrapper for youtube-dl, mainly for parallelism
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns ytdl | |
(:require | |
[babashka.fs :as fs] | |
[babashka.process :refer [process]] | |
[cheshire.core :as json] | |
[clojure.core.async :as async] | |
[clojure.edn :as edn] | |
[clojure.java.io :as io] | |
[clojure.tools.cli :refer [parse-opts]] | |
[clojure.string :as str])) | |
(def boolean-options | |
#{:help | |
:version | |
:update | |
:ignore-errors | |
:abort-on-error | |
:dump-user-agent | |
:list-extractors | |
:extractor-descriptions | |
:force-generic-extractor | |
:ignore-config | |
:flat-playlist | |
:mark-watched | |
:no-mark-watched | |
:no-color | |
:force-ipv4 | |
:force-ipv6 | |
:geo-bypass | |
:no-geo-bypass | |
:no-playlist | |
:yes-playlist | |
:include-ads | |
:skip-unavailable-fragments | |
:abort-on-unavailable-fragment | |
:keep-fragments | |
:no-resize-buffer | |
:playlist-reverse | |
:playlist-random | |
:xattr-set-filesize | |
:hls-prefer-native | |
:hls-prefer-ffmpeg | |
:hls-use-mpegts | |
:id | |
:restrict-filenames | |
:no-overwrites | |
:continue | |
:no-continue | |
:no-part | |
:no-mtime | |
:write-description | |
:write-info-json | |
:write-annotations | |
:no-cache-dir | |
:rm-cache-dir | |
:write-thumbnail | |
:write-all-thumbnails | |
:list-thumbnails | |
:quiet | |
:no-warnings | |
:simulate | |
:skip-download | |
:get-url | |
:get-title | |
:get-id | |
:get-thumbnail | |
:get-description | |
:get-duration | |
:get-filename | |
:get-format | |
:dump-json | |
:dump-single-json | |
:print-json | |
:newline | |
:no-progress | |
:console-title | |
:verbose | |
:dump-pages | |
:write-pages | |
:print-traffic | |
:call-home | |
:no-call-home | |
:no-check-certificate | |
:prefer-insecure | |
:bidi-workaround | |
:all-formats | |
:prefer-free-formats | |
:list-formats | |
:youtube-skip-dash-manifest | |
:write-sub | |
:write-auto-sub | |
:all-subs | |
:list-subs | |
:netrc | |
:ap-list-mso | |
:extract-audio | |
:keep-video | |
:no-post-overwrites | |
:embed-subs | |
:embed-thumbnail | |
:add-metadata | |
:xattrs | |
:prefer-avconv | |
:prefer-ffmpeg}) | |
(def options | |
#{:default-search | |
:config-location | |
:proxy | |
:socket-timeout | |
:source-address | |
:geo-verification-proxy | |
:geo-bypass-country | |
:geo-bypass-ip-block | |
:playlist-start | |
:playlist-end | |
:playlist-items | |
:match-title | |
:reject-title | |
:max-downloads | |
:min-filesize | |
:max-filesize | |
:date | |
:datebefore | |
:dateafter | |
:min-views | |
:max-views | |
:match-filter | |
:age-limit | |
:download-archive | |
:limit-rate | |
:retries | |
:fragment-retries | |
:buffer-size | |
:http-chunk-size | |
:external-downloader | |
:external-downloader-args | |
:batch-file | |
:output | |
:output-na-placeholder | |
:autonumber-start | |
:load-info-json | |
:cookies | |
:cache-dir | |
:encoding | |
:user-agent | |
:referer | |
:add-header | |
:sleep-interval | |
:max-sleep-interval | |
:format | |
:merge-output-format | |
:sub-format | |
:sub-lang | |
:username | |
:password | |
:twofactor | |
:video-password | |
:ap-mso | |
:ap-username | |
:ap-password | |
:audio-format | |
:audio-quality | |
:recode-video | |
:postprocessor-args | |
:metadata-from-title | |
:fixup | |
:ffmpeg-location | |
:exec | |
:convert-subs}) | |
(def extractor->url-format | |
{"youtube" "https://youtube.com/watch?v=%s"} | |
) | |
(defonce logger (agent nil)) | |
(defmacro -log [& msg] | |
(let [m (meta &form) | |
_ns (ns-name *ns*) ;; can also be used for logging | |
file *file*] | |
`(binding [*out* *err*] ;; or bind to (io/writer log-file) | |
(println (str ~file ":" ~(:line m) ":" ~(:column m)) ~@msg)))) | |
(defn -println | |
[& args] | |
(send logger (fn [_] (-log args)))) | |
(defmacro log | |
[& args] | |
(let [{:keys [line column]} (meta &form) | |
_ns (ns-name *ns*) ;; can also be used for logging | |
] | |
`(send | |
logger | |
(fn [~'_] | |
(binding [*out* *err*] ;; or bind to (io/writer log-file) | |
(println (str #_~file ":" ~line ":" ~column) ~@args)))))) | |
(defn logxf | |
[msg] | |
(map (fn [x] (log msg x) x))) | |
(defn parse | |
[s] | |
(json/parse-string s true)) | |
(defn list-videos! | |
[s] | |
(log "listing videos from" s) | |
(:out | |
@(process | |
["youtube-dl" | |
"--dump-json" | |
"--ignore-errors" | |
"--flat-playlist" | |
"--skip-download" | |
s] | |
{:out :string}))) | |
(defn find-extractor | |
[entry] | |
(str/lower-case | |
(or | |
(:extractor_key entry) | |
(:ie_key entry)))) | |
(defn video-entry->url | |
[entry] | |
(format | |
(get extractor->url-format (find-extractor entry)) | |
(:url entry))) | |
(defn ensure-file | |
[f] | |
(if (fs/exists? f) | |
f | |
(do (fs/create-dirs (fs/parent f)) | |
(fs/create-file f) | |
f))) | |
(defn load-archive | |
[f] | |
(with-open [rdr (io/reader (ensure-file f))] | |
(into #{} (line-seq rdr)))) | |
(defn list-playlists | |
[in out] | |
(async/pipeline-async | |
1 | |
out | |
(fn [v o] | |
(async/go | |
(let [v (async/<! (async/go (list-videos! v)))] | |
(log "ret val:" v) | |
(async/>! o v)) | |
(async/close! o))) | |
in)) | |
(defn entry->archive-key [entry] (str (find-extractor entry) " " (:url entry))) | |
(defn process-url-xf | |
[archive] | |
(comp | |
(logxf "before split") | |
(map str/split-lines) | |
cat | |
(map parse) | |
(logxf "entry:") | |
(remove | |
(fn [entry] | |
(let [k (entry->archive-key entry)] | |
(if (contains? archive k) | |
(do (log "key in archive:" k) true) | |
false)))))) | |
(defn process-urls | |
[in out archive] | |
(let [xf (process-url-xf archive)] | |
(async/pipeline 1 out xf in))) | |
(defn prepare-options | |
[opts] | |
(let [bo (select-keys opts boolean-options) | |
os (select-keys opts options) | |
args (into ["youtube-dl"] (comp (map (fn [[k v]] (when v (str "--" (name k))))) (remove nil?)) bo)] | |
(into args (comp (map (fn [[k v]] (when v [(str "--" (name k)) (str v)]))) (remove nil?) cat) os))) | |
(defn youtube-dl | |
([url] | |
(youtube-dl url {})) | |
([url opts] | |
(youtube-dl url opts {})) | |
([url opts popts] | |
(let [args (conj (prepare-options opts) url)] | |
(log "args:" args) | |
(process args popts)))) | |
(defn download-entry | |
[entry opts popts] | |
(let [url (video-entry->url entry)] | |
(log "downloading url" url) | |
(let [res @(youtube-dl url opts popts)] | |
(log "finished downloading url" url) | |
(when-not (:error res) | |
(entry->archive-key entry))))) | |
(defn download-videos!! | |
[n from to opts popts] | |
(async/pipeline-blocking | |
n | |
to | |
(map (fn [entry] (download-entry entry opts popts))) | |
from)) | |
(defn re-archive | |
[from f] | |
(async/thread | |
(with-open [w (io/writer f :append true)] | |
(loop [] | |
(when-let [k (async/<!! from)] | |
(try | |
(doto w | |
(.write k) | |
(.write "\n") | |
(.flush)) | |
(recur) | |
(catch Exception e | |
(log "error:" e) | |
nil))))))) | |
(defn main | |
([playlists] | |
(main 1 playlists)) | |
([n playlists] | |
(main n "archive.txt" playlists)) | |
([n archive-file playlists] | |
(main n archive-file playlists {})) | |
([n archive-file playlists opts] | |
(main n archive-file playlists opts {})) | |
([n archive-file playlists opts popts] | |
(let [in (async/to-chan playlists) | |
archive (load-archive archive-file) | |
downloaded (async/chan 1) | |
videos (async/chan 1) | |
urls (async/chan 1)] | |
(re-archive downloaded archive-file) | |
(download-videos!! n videos downloaded opts popts) | |
(process-urls urls videos archive) | |
(list-playlists in urls)))) | |
(defn maybe-url | |
[s] | |
(try | |
(java.net.URL. s) | |
(catch Exception _ | |
nil))) | |
(def cli-options | |
;; An option with a required argument | |
[["-n" "--threads THREADS" "Number of parallel video downloads" | |
:default 1 | |
:parse-fn #(Integer/parseInt %) | |
:validate [#(< 0 % 17) "Must be a number between 1 and 16"]] | |
["-a" "--archive ARCHIVE" "archive file" | |
:default "archive.txt" | |
:parse-fn #(ensure-file %)] | |
["-p" "--playlists PLAYLISTS" "playlists, file or edn sequence" | |
:parse-fn #(let [u (maybe-url %)] | |
(cond | |
(fs/exists? %) (str/split-lines (slurp %)) | |
u [u] | |
:else (edn/read-string %))) | |
:validate [#(seq %)]] | |
["-o" "--options OPTIONS" "youtube-dl options" | |
:default {} | |
:parse-fn #(edn/read-string %)] | |
["-P" "--process-options PROCESS-OPTIONS" "process builder options" | |
:default {} | |
:parse-fn #(edn/read-string %)] | |
["-h" "--help"]]) | |
(defn -main | |
[& args] | |
(let [{:keys [threads archive playlists options process-options]} | |
(parse-opts args cli-options)] | |
(assert playlists "Need something to download") | |
(main threads archive playlists options process-options))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment