Last active
December 21, 2015 08:19
-
-
Save jlandahl/6277757 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name := "wot-scala" | |
version := "0.0.1" | |
scalaVersion := "2.10.2" | |
libraryDependencies ++= Seq( | |
"org.jsoup" % "jsoup" % "1.7.2", | |
"com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.1.3" | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File | |
import scala.collection.JavaConversions._ | |
import org.jsoup.Jsoup | |
import org.jsoup.nodes.Element | |
object ParseVehicleData extends App { | |
val baseURL = "http://worldoftanks.com" | |
val categories = Map( | |
"Light Tanks" -> "lt", | |
"Medium Tanks" -> "mt", | |
"Heavy Tanks" -> "ht", | |
"Tank Destroyers" -> "td", | |
"SPGs" -> "spg") | |
val tiers = Map( | |
"I" -> 1, | |
"II" -> 2, | |
"III" -> 3, | |
"IV" -> 4, | |
"V" -> 5, | |
"VI" -> 6, | |
"VII" -> 7, | |
"VIII" -> 8, | |
"IX" -> 9, | |
"X" -> 10) | |
case class Vehicle(id: String, name: String, category: String, tier: Int, url: String) | |
def parseVehicle(category: String, elem: Element) = { | |
val name = elem.select(".b-encyclopedia-list_name").text | |
val relativeURL = elem.select(".b-encyclopedia-list_linc").attr("href") | |
val id = relativeURL.split("/").last.toLowerCase | |
val tierText = elem.select(".b-encyclopedia-list_level").text | |
val tier = tiers(tierText) | |
val url = baseURL + relativeURL | |
Vehicle(id, name, category, tier, url) | |
} | |
val input = new File("wot-encyclopedia-20130819.html") | |
val doc = Jsoup.parse(input, "UTF-8", "http://worldoftanks.com/encyclopedia/vehicles/") | |
doc.select(".b-encyclopedia-type").iterator.foreach { elem => | |
val category = categories(elem.text) | |
elem.nextElementSibling.select(".b-encyclopedia-list_point").iterator.foreach { elem => | |
val vehicle = parseVehicle(category, elem) | |
println(vehicle) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment