Last active
December 7, 2016 01:42
-
-
Save SethTisue/960016 to your computer and use it in GitHub Desktop.
parse iTunes library XML, compile stats. shows how to use Scala's XML pull API, by putting some DSL-y goodness on top of it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/tunes.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// nicer API/DSL for dealing with XMLEventReader | |
class Reader(source: io.Source) { | |
import scala.xml.pull._ | |
private val reader = | |
new XMLEventReader(source) { override val preserveWS = false } | |
private val it: BufferedIterator[XMLEvent] = reader.buffered | |
def start() = it.next() match { case EvElemStart(_, s, _, _) => s } | |
def start(s: String) { it.next() match { case EvElemStart(_, `s`, _, _) => } } | |
def end() = it.next() match { case EvElemEnd(_, s) => s} | |
def end(s: String) { it.next() match { case EvElemEnd(_, `s`) => } } | |
// it would be nice to use BufferedIterator.takeWhile here, but it advances the iterator one too | |
// far; see https://issues.scala-lang.org/browse/SI-3581 - ST 7/15/10 | |
def text() = Iterator.continually(it.head) | |
.takeWhile(_.isInstanceOf[EvText]) | |
.collect{case EvText(x) => it.next(); x} | |
.mkString | |
def slurp() = (start(), text(), end())._2 | |
def slurp(s: String) = (start(s), text(), end(s))._2 | |
def atEnd = it.head.isInstanceOf[EvElemEnd] | |
def atEnd(s: String) = it.head match { | |
case EvElemEnd(_, `s`) => true | |
case _ => false | |
} | |
def stop() { reader.stop() } | |
} | |
/// read the XML file | |
case class Track(name: String, artist: String, album: String, time: Long, plays: Int, lastPlayed: Long) | |
def getTracks(source: io.Source): Traversable[Track] = | |
new Traversable[Track] { | |
override def foreach[T](fn: Track => T) { | |
val reader = new Reader(source) | |
import reader._ | |
start("plist") | |
start("dict") | |
while(slurp("key") != "Tracks") | |
slurp() | |
start("dict") | |
while(!atEnd) { | |
slurp("key") | |
start("dict") | |
val entries = { | |
val temp = collection.mutable.Map[String, String]() | |
while(!atEnd("dict")) | |
temp += slurp("key") -> slurp() | |
end("dict") | |
temp.toMap | |
} | |
try | |
fn(Track( | |
name = entries("Name"), | |
artist = entries.getOrElse("Sort Artist", entries("Artist")), | |
album = entries.getOrElse("Album", ""), | |
time = entries("Total Time").toLong, | |
plays = entries.getOrElse("Play Count", "0").toInt, | |
lastPlayed = entries.getOrElse("Play Date", "0").toLong)) | |
catch { | |
case _: java.util.NoSuchElementException => | |
if (!skippable(entries)) | |
println(entries) | |
} | |
} | |
reader.stop() | |
} | |
} | |
def skippable(entries: Map[String, String]): Boolean = | |
!entries.isDefinedAt("Kind") || | |
entries("Kind").endsWith(" app") || | |
entries("Kind").endsWith(" book") || | |
entries("Kind") == "Book" || | |
entries("Genre") == "Podcast" | |
def read(source: io.Source) = { | |
def newMap = collection.mutable.Map[String, Long]() | |
val (trackNames, artists, artistLengths, albums, albumLengths, playDates) = (newMap, newMap, newMap, newMap, newMap, newMap) | |
def loop() { | |
for(track <- getTracks(source)) { | |
import track._ | |
trackNames(artist + " - " + name) = time * plays | |
artists(artist) = time * plays + artists.getOrElse(artist, 0L) | |
artistLengths(artist) = time + artistLengths.getOrElse(artist, 0L) | |
val artistAlbum = artist + " - " + album | |
albums(artistAlbum) = time * plays + albums.getOrElse(artistAlbum, 0L) | |
albumLengths(artistAlbum) = time + albumLengths.getOrElse(artistAlbum, 0L) | |
if(!playDates.isDefinedAt(artist) || playDates(artist) < lastPlayed) | |
playDates(artist) = lastPlayed | |
// if(artists.size >= 200) return | |
} | |
} | |
loop() | |
(trackNames.toMap, artists.toMap, artistLengths.toMap, albums.toMap, albumLengths.toMap, playDates.toMap) | |
} | |
/// reports | |
def artistReport1(artists: Map[String, Long]) { | |
println("ARTISTS (TOTAL LISTENING TIME)") | |
val totalTime = artists.map(_._2).sum | |
artists.toList.sortBy(_._2).reverse.zipWithIndex.foreach{ | |
case ((artist, time), index) => | |
println((index + 1) + ". " + artist + " - " + (100000 * time / totalTime) / 1000.0 + "%") | |
} | |
} | |
def artistReport2(artists: Map[String, Long], artistLengths: Map[String, Long]) { | |
println("ARTISTS (TOTAL LISTENING TIME VS. LENGTH, 20 MIN MIN)") | |
artistLengths.toList.filter(_._2 > 1200000) | |
.map{case (artist, time) => (artist, artists(artist) / time.toDouble)} | |
.sortBy(_._2).reverse.map(_._1).zipWithIndex.foreach{ | |
case (artist, index) => | |
println((index + 1) + ". " + artist) | |
} | |
} | |
def albumReport1(albums: Map[String, Long]) { | |
println("ALBUMS (TOTAL LISTENING TIME)") | |
albums.toList.filter(_._2 > 0) | |
.sortWith(_._2 > _._2).map(_._1).zipWithIndex.foreach{ | |
case (album, index) => | |
println((index + 1) + ". " + album) | |
} | |
} | |
def albumReport2(albums: Map[String, Long], albumLengths: Map[String, Long]) { | |
println("ALL ALBUMS (LISTENING TIME VS. LENGTH)") | |
albums.toList.filter(_._2 > 0) | |
.map{case (album, time) => (album, time.toDouble / albumLengths(album))} | |
.sortBy(_._2).reverse.map(_._1).zipWithIndex.foreach{ | |
case (album, index) => | |
println((index + 1) + ". " + album) | |
} | |
} | |
def albumReport3(albums: Map[String, Long], albumLengths: Map[String, Long]) { | |
println("FULL ALBUMS (LISTENING TIME VS. LENGTH)") | |
albums.toList.filter(a => a._2 > 0 && albumLengths(a._1) > 600000) | |
.map{case (album, time) => (album, time.toDouble / albumLengths(album))} | |
.sortBy(_._2).reverse.map(_._1).zipWithIndex.foreach{ | |
case (album, index) => | |
println((index + 1) + ". " + album) | |
} | |
} | |
def trackReport(trackNames: Map[String, Long]) { | |
println("TRACKS (TOTAL LISTENING TIME)") | |
trackNames.toList.filter(_._2 > 0) | |
.sortBy(_._2).reverse.map(_._1).take(1000).zipWithIndex.foreach{ | |
case (track, index) => | |
println((index + 1) + ". " + track) | |
} | |
} | |
def lastPlayedReport(playDates: Map[String, Long]) { | |
println("RECENTLY LISTENED ARTISTS") | |
playDates.toList.filter(_._2 > 0) | |
.sortBy(_._2).reverse.map(_._1).zipWithIndex.foreach{ | |
case (artist, index) => | |
println((index + 1) + ". " + artist) | |
} | |
println() | |
println("NEVER LISTENED ARTISTS") | |
playDates.toList.filter(_._2 == 0).map(_._1).sorted.foreach{println} | |
} | |
/// do it! | |
val path = "/Users/tisue/Dropbox/Archive/iTunes/iTunes Library.xml" | |
val (trackNames, artists, artistLengths, albums, albumLengths, playDates) = | |
read(io.Source.fromFile(path)) | |
artistReport1(artists) | |
println() | |
artistReport2(artists, artistLengths) | |
println() | |
albumReport1(albums) | |
println() | |
albumReport2(albums, albumLengths) | |
println() | |
albumReport3(albums, albumLengths) | |
println() | |
trackReport(trackNames) | |
println() | |
lastPlayedReport(playDates) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment