Skip to content

Instantly share code, notes, and snippets.

@arberg
Created December 7, 2018 12:40
Show Gist options
  • Save arberg/8e74274e7d7afc5395537e147f7a93d9 to your computer and use it in GitHub Desktop.
Save arberg/8e74274e7d7afc5395537e147f7a93d9 to your computer and use it in GitHub Desktop.
Download from youtubeDl with my custom video quality preference and downnload subtitles and convert vtt to srt with ffmpeg. Written in scala ammonite, so needs Ammonite to run it.
import ammonite.ops._
import ujson.Js.Value
import upickle.default.{macroRW, ReadWriter => RW}
//import scala.math.Ordering.Implicits._
//https://transform.now.sh/json-to-scala-case-class
case class SubLanguage(ext: String, url: String)
object SubLanguage { // uPickle 0.7.1 needs these to parse the objects. Really ugly, maybe gson is better?
implicit val rw: RW[SubLanguage] = macroRW
}
case class Formats(
// format: String,
format_id: String,
height: Int,
// width: Int,
// preference: String,
// manifest_url: String,
// url: String,
// protocol: String,
// http_headers: HttpHeaders,
ext: String,
// vcodec: String,
tbr: Int, // total bitrate
// acodec: String,
// fps: String
) extends Ordered[Formats]
{
def compareValueFormatId = {
val pattern = """.*-(\d+)""".r
format_id match {
case pattern(number) => -number.toInt
case _ => 0
}
}
def compare(that: Formats): Int = {
val aTuple: (Int, Int, Int, Int) = (this.height, this.tbr, formatPreference(this.ext), this.compareValueFormatId)
val bTuple: (Int, Int, Int, Int) = (that.height, that.tbr, formatPreference(that.ext), that.compareValueFormatId)
implicitly[Ordering[Tuple4[Int, Int, Int, Int]]].compare(aTuple, bTuple)
}
}
object Formats {
implicit val rw: RW[Formats] = macroRW
}
// Upickle fails with AbortException if items are missing
case class Subtitles(da: Seq[SubLanguage]/*, en: Seq[SubLanguage]*/)
object Subtitles {
implicit val rw: RW[Subtitles] = macroRW
}
case class Thumbnails(id: String, url: String)
object Thumbnails {
implicit val rw: RW[Thumbnails] = macroRW
}
//case class HttpHeaders(
//"Accept-Encoding": String,
//"User-Agent": String,
//"Accept-Charset": String,
//"Accept-Language": String,
//Accept: String,
//Cookie: String
//)
case class YoutubeDl
(
height: Int,
// playlist: String,
title: String,
formats: Seq[Formats],
// description: String,
// url: String,
// thumbnails: Seq[Thumbnails],
// http_headers: HttpHeaders,
fps: String,
display_id: String,
format_id: String,
// extractor: String,
// duration: Double,
// preference: String,
// manifest_url: String,
// acodec: String,
// protocol: String,
// id: String,
// vcodec: String,
// requested_subtitles: String,
// ext: String,
// subtitles: Subtitles,
// webpage_url_basename: String,
// playlist_index: String,
// upload_date: String,
// format: String,
// webpage_url: String,
// width: Int,
// timestamp: Int,
// thumbnail: String,
// extractor_key: String,
tbr: Int
)
object YoutubeDl{
implicit val rw: RW[YoutubeDl] = macroRW
}
// Also in AlexFunctions, todo import it instead
def basename(d: Path) = {
// OR d.name.split("\\.(?=[^\\.]+$)")(0)
val n = d.name
val extSize = d.ext.size
val extSizeWithPeriod = if (extSize > 0) extSize + 1 else 0
d.name.substring(0, d.name.size - extSizeWithPeriod)
}
def convertSubtitlesAllVttToSrt(path: Path): Unit = {
ls(path) |? {
_.ext == "vtt"
} | { f =>
val srtName = basename(f) + ".srt"
if (!exists(path / srtName)) {
%("ffmpeg.exe", "-i", f.name, basename(f) + ".srt")(path)
}
rm(f)
}
}
// Unused
def getBestFormatIdWithuJsonExample(jsonString: String): String = {
val tbr="tbr"
val ext="ext"
val json: Value = ujson.read(jsonString)
val formats: ujson.Js.Arr = json("formats").arr
var best : Option[ujson.Js] = None
for (current <- formats.value) {
if (best.isEmpty) {
best = Some(current)
} else {
val theBest=best.get
if (current(tbr).num > theBest(tbr).num ||
(current(tbr).num == theBest(tbr).num &&
current(ext).str == "mp4" && theBest(ext).str != "mp4") ) {
best = Some(current)
}
}
}
best.get("format_id").str
}
def formatPreference(ext: String): Int = {
// 3gp, aac, flv, m4a, mp3, mp4, ogg, wav, webm
ext match {
// video containers
case "mkv" => 10 // not possible in youtube-dl
case "mp4" => 9
case "webm" => 8
case "flv" => 7
case "3gp" => 6
// audio only
case "m4a" => 5
case "aac" => 4
case "ogg" => 3
case "mp3" => 2
case "wav" => 1
case _ => 0
}
}
def selectBestFormat(a: Formats, b: Formats) = {
if (a.compare(b) >= 0) a else b
}
def getBestFormatId(path: Path, videoUrl: String): String = {
val jsonString = %%("youtube-dl", "-J", videoUrl)(path).out.lines(0)
val json = upickle.default.read[YoutubeDl](jsonString) // upickle variant
val formats: Seq[Formats] = json.formats
if (formats.isEmpty) ""
else formats.foldLeft(formats.head)(selectBestFormat).format_id
}
def downloadVideo(path: Path, videoUrl: String): Unit = {
// --download-archive : I think it zips and keeps all intermediaries. Might be needed if ffmpeg corrupts stream
val formatId = getBestFormatId(path, videoUrl)
val p = %%(s"youtube-dl --all-subs --fixup warn -f $formatId $videoUrl".split(" ").toVector)(path)
// rm(path/"*temp.mp4") // HowTo, maybe: ls(path) |? {_.name.endsWith("temp.mp4"} | rm
println(p.out.lines.mkString("\n"))
if (p.exitCode == 0) {
println("Success")
convertSubtitlesAllVttToSrt(path)
} else {
println("Failed: " + p.err.lines.mkString("\n"))
}
}
val filmPath = Path("""m:\Film""")
val serierJul = filmPath / "Serier Jul"
downloadVideo(serierJul / "MoviePath", "https://www.youtube.com/watch?v=_JUwcv7dUQI")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment