Created
May 30, 2017 11:29
-
-
Save iRevive/8184399d98ea27c242887c32c10fc224 to your computer and use it in GitHub Desktop.
VK music parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.regex.Pattern | |
import net.logstash.logback.encoder.org.apache.commons.lang.StringEscapeUtils | |
import scala.io.Source | |
import scala.util.control.NonFatal | |
object Main { | |
def main(args: Array[String]): Unit = | |
args.toList match { | |
case input :: output :: Nil=> | |
println(s"Input: [$input]. Output: [$output]") | |
process(input, output) | |
case _ => | |
sys.error("Invalid args format. Please, pass 2 args: 1) input file path 2) output file path") | |
} | |
def process(input: String, output: String): Unit = { | |
val regex = """(,\")|(\",)""" | |
val dataAudioRegex = Pattern.compile("data-audio=\\\"(?<text>.*)\\\"") | |
import java.io._ | |
val pw = new PrintWriter(new File(output)) | |
try { | |
Source.fromFile(new File(input)) | |
.getLines() | |
.filter { line => dataAudioRegex.matcher(line).find() } | |
.map { line => | |
val matcher = dataAudioRegex.matcher(line) | |
matcher.find() | |
val text = StringEscapeUtils.unescapeHtml(matcher.group("text")) | |
val result = text.split(regex) | |
try { | |
(result(3).drop(1), result(2).drop(1)) | |
} catch { | |
case NonFatal(e) => | |
("error", line) | |
} | |
} | |
.toList | |
.sortBy { case (band, _) => band } | |
.foreach { case (band, song) => | |
pw.write(band + " - " + song) | |
pw.write("\r\n") | |
} | |
} finally { | |
pw.close() | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment