Skip to content

Instantly share code, notes, and snippets.

@tototoshi
Created December 30, 2011 10:05
Show Gist options
  • Save tototoshi/1539116 to your computer and use it in GitHub Desktop.
Save tototoshi/1539116 to your computer and use it in GitHub Desktop.
dispatch+Lift+scala-ioでスクレイピング
package com.github.tototoshi.example
import scala.xml.{ NodeSeq, Elem }
import dispatch._
import net.liftweb._
import common._
import util._
import scalax.io._
object Filename {
def unapply(url: String): Option[String] = {
url.split("/").reverse.toList.headOption
}
}
object Main {
def save(url: String): Unit = {
val data = Resource.fromURL(url).byteArray
println("downloading: %s ..." format url)
url match {
case Filename(file) => Resource.fromFile(new java.io.File("data", file)).write(data)
case _ => sys.error("Oops!")
}
}
def jpgFilter(filename: NodeSeq): Boolean = filename.text endsWith ".jpg"
def main(args: Array[String]): Unit = {
val h = new Http
val req = url("http://satlog.blog119.fc2.com/blog-entry-2943.html") >\ "EUC-JP"
val html: String = h(req as_str)
val elem: NodeSeq = Html5.parse(html) openOr NodeSeq.Empty
elem \\ "img" \\ "@src" filter jpgFilter foreach { img => save(img.text) }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment