Created
December 30, 2011 10:05
-
-
Save tototoshi/1539116 to your computer and use it in GitHub Desktop.
dispatch+Lift+scala-ioでスクレイピング
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.github.tototoshi.example | |
import scala.xml.{ NodeSeq, Elem } | |
import dispatch._ | |
import net.liftweb._ | |
import common._ | |
import util._ | |
import scalax.io._ | |
object Filename { | |
def unapply(url: String): Option[String] = { | |
url.split("/").reverse.toList.headOption | |
} | |
} | |
object Main { | |
def save(url: String): Unit = { | |
val data = Resource.fromURL(url).byteArray | |
println("downloading: %s ..." format url) | |
url match { | |
case Filename(file) => Resource.fromFile(new java.io.File("data", file)).write(data) | |
case _ => sys.error("Oops!") | |
} | |
} | |
def jpgFilter(filename: NodeSeq): Boolean = filename.text endsWith ".jpg" | |
def main(args: Array[String]): Unit = { | |
val h = new Http | |
val req = url("http://satlog.blog119.fc2.com/blog-entry-2943.html") >\ "EUC-JP" | |
val html: String = h(req as_str) | |
val elem: NodeSeq = Html5.parse(html) openOr NodeSeq.Empty | |
elem \\ "img" \\ "@src" filter jpgFilter foreach { img => save(img.text) } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment