Skip to content

Instantly share code, notes, and snippets.

@iRevive
Created May 30, 2017 11:29
Show Gist options
  • Save iRevive/8184399d98ea27c242887c32c10fc224 to your computer and use it in GitHub Desktop.
Save iRevive/8184399d98ea27c242887c32c10fc224 to your computer and use it in GitHub Desktop.
VK music parsing
import java.util.regex.Pattern
import net.logstash.logback.encoder.org.apache.commons.lang.StringEscapeUtils
import scala.io.Source
import scala.util.control.NonFatal
object Main {
def main(args: Array[String]): Unit =
args.toList match {
case input :: output :: Nil=>
println(s"Input: [$input]. Output: [$output]")
process(input, output)
case _ =>
sys.error("Invalid args format. Please, pass 2 args: 1) input file path 2) output file path")
}
def process(input: String, output: String): Unit = {
val regex = """(,\")|(\",)"""
val dataAudioRegex = Pattern.compile("data-audio=\\\"(?<text>.*)\\\"")
import java.io._
val pw = new PrintWriter(new File(output))
try {
Source.fromFile(new File(input))
.getLines()
.filter { line => dataAudioRegex.matcher(line).find() }
.map { line =>
val matcher = dataAudioRegex.matcher(line)
matcher.find()
val text = StringEscapeUtils.unescapeHtml(matcher.group("text"))
val result = text.split(regex)
try {
(result(3).drop(1), result(2).drop(1))
} catch {
case NonFatal(e) =>
("error", line)
}
}
.toList
.sortBy { case (band, _) => band }
.foreach { case (band, song) =>
pw.write(band + " - " + song)
pw.write("\r\n")
}
} finally {
pw.close()
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment