Created
December 7, 2018 12:40
-
-
Save arberg/8e74274e7d7afc5395537e147f7a93d9 to your computer and use it in GitHub Desktop.
Download from youtubeDl with my custom video quality preference and downnload subtitles and convert vtt to srt with ffmpeg. Written in scala ammonite, so needs Ammonite to run it.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ammonite.ops._ | |
import ujson.Js.Value | |
import upickle.default.{macroRW, ReadWriter => RW} | |
//import scala.math.Ordering.Implicits._ | |
//https://transform.now.sh/json-to-scala-case-class | |
case class SubLanguage(ext: String, url: String) | |
object SubLanguage { // uPickle 0.7.1 needs these to parse the objects. Really ugly, maybe gson is better? | |
implicit val rw: RW[SubLanguage] = macroRW | |
} | |
case class Formats( | |
// format: String, | |
format_id: String, | |
height: Int, | |
// width: Int, | |
// preference: String, | |
// manifest_url: String, | |
// url: String, | |
// protocol: String, | |
// http_headers: HttpHeaders, | |
ext: String, | |
// vcodec: String, | |
tbr: Int, // total bitrate | |
// acodec: String, | |
// fps: String | |
) extends Ordered[Formats] | |
{ | |
def compareValueFormatId = { | |
val pattern = """.*-(\d+)""".r | |
format_id match { | |
case pattern(number) => -number.toInt | |
case _ => 0 | |
} | |
} | |
def compare(that: Formats): Int = { | |
val aTuple: (Int, Int, Int, Int) = (this.height, this.tbr, formatPreference(this.ext), this.compareValueFormatId) | |
val bTuple: (Int, Int, Int, Int) = (that.height, that.tbr, formatPreference(that.ext), that.compareValueFormatId) | |
implicitly[Ordering[Tuple4[Int, Int, Int, Int]]].compare(aTuple, bTuple) | |
} | |
} | |
object Formats { | |
implicit val rw: RW[Formats] = macroRW | |
} | |
// Upickle fails with AbortException if items are missing | |
case class Subtitles(da: Seq[SubLanguage]/*, en: Seq[SubLanguage]*/) | |
object Subtitles { | |
implicit val rw: RW[Subtitles] = macroRW | |
} | |
case class Thumbnails(id: String, url: String) | |
object Thumbnails { | |
implicit val rw: RW[Thumbnails] = macroRW | |
} | |
//case class HttpHeaders( | |
//"Accept-Encoding": String, | |
//"User-Agent": String, | |
//"Accept-Charset": String, | |
//"Accept-Language": String, | |
//Accept: String, | |
//Cookie: String | |
//) | |
case class YoutubeDl | |
( | |
height: Int, | |
// playlist: String, | |
title: String, | |
formats: Seq[Formats], | |
// description: String, | |
// url: String, | |
// thumbnails: Seq[Thumbnails], | |
// http_headers: HttpHeaders, | |
fps: String, | |
display_id: String, | |
format_id: String, | |
// extractor: String, | |
// duration: Double, | |
// preference: String, | |
// manifest_url: String, | |
// acodec: String, | |
// protocol: String, | |
// id: String, | |
// vcodec: String, | |
// requested_subtitles: String, | |
// ext: String, | |
// subtitles: Subtitles, | |
// webpage_url_basename: String, | |
// playlist_index: String, | |
// upload_date: String, | |
// format: String, | |
// webpage_url: String, | |
// width: Int, | |
// timestamp: Int, | |
// thumbnail: String, | |
// extractor_key: String, | |
tbr: Int | |
) | |
object YoutubeDl{ | |
implicit val rw: RW[YoutubeDl] = macroRW | |
} | |
// Also in AlexFunctions, todo import it instead | |
def basename(d: Path) = { | |
// OR d.name.split("\\.(?=[^\\.]+$)")(0) | |
val n = d.name | |
val extSize = d.ext.size | |
val extSizeWithPeriod = if (extSize > 0) extSize + 1 else 0 | |
d.name.substring(0, d.name.size - extSizeWithPeriod) | |
} | |
def convertSubtitlesAllVttToSrt(path: Path): Unit = { | |
ls(path) |? { | |
_.ext == "vtt" | |
} | { f => | |
val srtName = basename(f) + ".srt" | |
if (!exists(path / srtName)) { | |
%("ffmpeg.exe", "-i", f.name, basename(f) + ".srt")(path) | |
} | |
rm(f) | |
} | |
} | |
// Unused | |
def getBestFormatIdWithuJsonExample(jsonString: String): String = { | |
val tbr="tbr" | |
val ext="ext" | |
val json: Value = ujson.read(jsonString) | |
val formats: ujson.Js.Arr = json("formats").arr | |
var best : Option[ujson.Js] = None | |
for (current <- formats.value) { | |
if (best.isEmpty) { | |
best = Some(current) | |
} else { | |
val theBest=best.get | |
if (current(tbr).num > theBest(tbr).num || | |
(current(tbr).num == theBest(tbr).num && | |
current(ext).str == "mp4" && theBest(ext).str != "mp4") ) { | |
best = Some(current) | |
} | |
} | |
} | |
best.get("format_id").str | |
} | |
def formatPreference(ext: String): Int = { | |
// 3gp, aac, flv, m4a, mp3, mp4, ogg, wav, webm | |
ext match { | |
// video containers | |
case "mkv" => 10 // not possible in youtube-dl | |
case "mp4" => 9 | |
case "webm" => 8 | |
case "flv" => 7 | |
case "3gp" => 6 | |
// audio only | |
case "m4a" => 5 | |
case "aac" => 4 | |
case "ogg" => 3 | |
case "mp3" => 2 | |
case "wav" => 1 | |
case _ => 0 | |
} | |
} | |
def selectBestFormat(a: Formats, b: Formats) = { | |
if (a.compare(b) >= 0) a else b | |
} | |
def getBestFormatId(path: Path, videoUrl: String): String = { | |
val jsonString = %%("youtube-dl", "-J", videoUrl)(path).out.lines(0) | |
val json = upickle.default.read[YoutubeDl](jsonString) // upickle variant | |
val formats: Seq[Formats] = json.formats | |
if (formats.isEmpty) "" | |
else formats.foldLeft(formats.head)(selectBestFormat).format_id | |
} | |
def downloadVideo(path: Path, videoUrl: String): Unit = { | |
// --download-archive : I think it zips and keeps all intermediaries. Might be needed if ffmpeg corrupts stream | |
val formatId = getBestFormatId(path, videoUrl) | |
val p = %%(s"youtube-dl --all-subs --fixup warn -f $formatId $videoUrl".split(" ").toVector)(path) | |
// rm(path/"*temp.mp4") // HowTo, maybe: ls(path) |? {_.name.endsWith("temp.mp4"} | rm | |
println(p.out.lines.mkString("\n")) | |
if (p.exitCode == 0) { | |
println("Success") | |
convertSubtitlesAllVttToSrt(path) | |
} else { | |
println("Failed: " + p.err.lines.mkString("\n")) | |
} | |
} | |
val filmPath = Path("""m:\Film""") | |
val serierJul = filmPath / "Serier Jul" | |
downloadVideo(serierJul / "MoviePath", "https://www.youtube.com/watch?v=_JUwcv7dUQI") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment