Skip to content

Instantly share code, notes, and snippets.

@Hajto
Created May 23, 2015 09:45
Show Gist options
  • Save Hajto/8b48ce689eec21c176d0 to your computer and use it in GitHub Desktop.
Save Hajto/8b48ce689eec21c176d0 to your computer and use it in GitHub Desktop.
Scala Scrapper
import model.FuneralSchedule
import play.api.libs.json.Json
import scala.io.Source
var date = "2015-05-05"
val source = Source.fromURL("http://zck.krakow.pl/?pageId=16&date=" + date).mkString
val regex = "(?s)<table>.+?(Cmentarz.+?)<.+?</table>".r
var thing: List[FuneralSchedule] = List()
var jsonFeed: List[Funeral] = List()
val regMatcher = "("
case class Funeral(hour: Option[String], who: Option[String], age: Option[String]) {
override def toString: String = {
"Cos"
}
}
implicit val format = Json.format[Funeral]
val out = regex.findAllIn(source).matchData foreach { table =>
thing ::= FuneralSchedule(table.group(1), clearStrings(table.group(0)))
"""<tr\s?>.+?</\s?tr>""".r.findAllIn(clearStrings(table.group(0))).matchData foreach { tr =>
//TODO: Naprawic bo szlak trafia wydajnosc
val a #:: b #:: c #:: _ = """<td\s?>.+?</\s?td>""".r.findAllIn(tr.group(0)).toStream
jsonFeed ::= Funeral(removeMarkers(a),removeMarkers(b),removeMarkers(c))
}
println("Koniec tabeli")
}
thing
Json.toJson(jsonFeed)
println(removeMarkers(Some("<td > <td> Marian Debil </ td>")))
def removeMarkers(s: Option[String]) : String= {
s match {
case Some(value) => removeMarker(value)
case None => " "
}
def removeMarker(s: String) = {
s.replaceAll( """(</?\s?td\s?>)""", "")
}
}
def clearStrings(s: String) = {
val regex = "((class=\".+?\")|(id=\".+?\")|(style=\".+?\")|(\\n))"
s.replaceAll(regex, "")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment