Created
January 25, 2020 14:33
-
-
Save kolja/06044345e7c7d0f39839db9be0c4a21f to your computer and use it in GitHub Desktop.
Subtitle Editor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
"exec" "plk" "-Sdeps" "{:deps {org.clojure/tools.cli {:mvn/version \"0.4.2\"}}}" "-Ksf" "$0" "$@" | |
(ns subtitles.core | |
(:require | |
[planck.http :as http] | |
[planck.core :as core] | |
[clojure.tools.cli :refer [parse-opts]] | |
[clojure.string :as s])) | |
(def help " | |
SUBTITLE EDITOR (.srt) | |
---------------------- | |
Reads a subtitle.srt file from stdin and makes changes to it | |
usage: | |
-e <ms> show subtitles earlier (milliseconds) | |
-l <ms> show subtitles later (milliseconds) | |
-t <from:to> translate subtitles form <language> to <language> | |
using google translate | |
-r <from:to> only translate subtitles that are in the range | |
-h display this help screen | |
example: | |
cat subtitles.srt | sub.cljs -t en:zh -r 1:100 > chinese.srt\n") | |
(def cli-options | |
[["-e" "--earlier <milliseconds>" "show subtitles earlier <milliseconds>" | |
:default 0 | |
:parse-fn #(js/parseInt %)] | |
["-l" "--later <milliseconds>" "postpone subtitles <milliseconds>" | |
:default 0 | |
:parse-fn #(js/parseInt %)] | |
["-t" "--translate <from:to>" "translate <from:to>" | |
:parse-fn #(rest (re-matches #"([\w-]+):([\w-]+)" %))] | |
["-r" "--range <from-id:to-id>" "only translate subtitles that are in the range" | |
:default [] | |
:parse-fn #(map js/parseInt (rest (re-matches #"(\d+):(\d+)" %)))] | |
["-h" "--help"]]) | |
(defn to-timestamp [total] | |
(let [ms (rem total 1000) | |
hours (quot total 3600000) | |
minutes (- (quot total 60000) (* 60 hours)) | |
seconds (- (quot total 1000) (* 60 minutes) (* 3600 hours))] | |
(goog.string.format "%02d:%02d:%02d,%d" hours minutes seconds ms))) | |
(defn to-ms [timestamp] | |
(->> timestamp | |
(re-matches #"(\d{2})\:(\d{2})\:(\d{2}),(\d{1,3})") | |
rest | |
(map * [3600000 60000 1000 1]) | |
(reduce +))) | |
(defn parse [line el] | |
(condp re-matches line | |
#"(.*)\s+-->\s+(.*)" :>> #(assoc el :time (map to-ms (rest %))) | |
#".*" :>> #(assoc el :text (conj (or (:text el) []) line)))) | |
(defn agg [acc line] | |
(if (re-matches #"^\d+$" line) | |
(conj acc {:id line}) | |
(conj (pop acc) (parse line (last acc))))) | |
(defn translate [text from to] | |
;; supported languages: https://www.labnol.org/code/19899-google-translate-languages | |
(let [uri (-> (goog.uri.utils.setPath "https://translate.googleapis.com/" "translate_a/single") | |
(goog.uri.utils.appendParamsFromMap (clj->js {:client "gtx" | |
:sl from | |
:tl to | |
:dt "t" | |
:q text})))] | |
(get-in (js->clj (.parse js/JSON (:body (http/get uri)))) [0 0 0]))) | |
(defn -main [& args] | |
(let [opts (:options (parse-opts args cli-options)) | |
offset (- (get opts :later) (get opts :earlier)) | |
input (reduce agg [] (core/line-seq core/*in*)) | |
last-id (js/parseInt (get (last input) :id)) | |
[src-lang target-lang] (get opts :translate) | |
rng (get opts :range) | |
[from to] (if (empty? rng) [0 last-id] rng)] | |
(when (:help opts) (println help)) | |
(doseq [{:keys [id time text]} input] | |
(println id) | |
(println (to-timestamp (+ (first time) offset)) | |
"-->" | |
(to-timestamp (+ (second time) offset))) | |
(let [subtitle (s/join "\n" text)] | |
(if (and src-lang target-lang (<= from id to)) | |
(println (translate subtitle src-lang target-lang)) | |
(println subtitle)))))) | |
(set! *main-cli-fn* -main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment