Skip to content

Instantly share code, notes, and snippets.

@fairjm
Created December 5, 2014 17:27
Show Gist options
  • Save fairjm/089ba238d355360c91a8 to your computer and use it in GitHub Desktop.
Save fairjm/089ba238d355360c91a8 to your computer and use it in GitHub Desktop.
record some scala code
// a simple script
// batch download from http://www.andrew.cmu.edu/course/15-749/READINGS/optional/
//extract file name
def getAllFiles(url:String):List[String] = {
import scala.io._
val regex = """<a href="(.+?\.pdf)">""".r
val content = Source.fromURL(url).mkString
regex.findAllMatchIn(content).map(_.group(1)).toList
}
// do download
def download(list:List[String],url:String) = {
import java.nio.channels._
import java.nio._
import java.net._
list foreach { name =>
val website = new URL(s"$url$name")
val rbc = Channels.newChannel(website.openStream())
//current directory
val fos = new java.io.FileOutputStream(s"$name")
fos.getChannel().transferFrom(rbc,0,Long.MaxValue)
fos.close()
}
}
// or use
def download2(list:List[String],url:String) = {
import java.nio.file._
import scala.io._
list foreach { name =>
//use ISO-8859-1 codec to download binary
val content = Source.fromURL(s"$url$name","ISO-8859-1")
Files.write(Paths.get(s"$name"),content.map(_.toByte).toArray)
}
}
//run
downloadu(getAllFiles("http://www.andrew.cmu.edu/course/15-749/READINGS/optional/"),"http://www.andrew.cmu.edu/course/15-749/READINGS/optional/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment