Last active
December 20, 2015 09:09
-
-
Save travisbrown/6105462 to your computer and use it in GitHub Desktop.
Simple example of how to use Dispatch to access the Chronicling America API asynchronously.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scalaVersion := "2.10.2" | |
libraryDependencies ++= Seq( | |
"net.databinder.dispatch" %% "dispatch-core" % "0.11.0", | |
"net.databinder.dispatch" %% "dispatch-json4s-jackson" % "0.11.0", | |
"net.sf.opencsv" % "opencsv" % "2.0" | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object Searcher { | |
import dispatch.{ Http, url }, dispatch.Defaults._, dispatch.as | |
import org.json4s._ | |
import scala.Function.const | |
import scala.concurrent.Future | |
import scala.util._ | |
implicit val formats = DefaultFormats | |
val pageReq = url("http://chroniclingamerica.loc.gov/search/pages/results/") | |
def constructRequest( | |
conj: List[String], | |
disj: List[String], | |
yearRange: (Int, Int), | |
page: Option[Int] = None | |
) = pageReq <<? Map( | |
"format" -> "json", | |
"date1" -> yearRange._1.toString, | |
"date2" -> yearRange._2.toString, | |
"dateFilterType" -> "yearRange", | |
"andtext" -> conj.mkString(" "), | |
"ortext" -> disj.mkString(" ") | |
) ++ page.map("page" -> _.toString) | |
def retrievePage( | |
conj: List[String], | |
disj: List[String], | |
yearRange: (Int, Int), | |
page: Option[Int] = None | |
) = Http(constructRequest(conj, disj, yearRange, page) OK as.json4s.Json) | |
def search( | |
conj: List[String], | |
disj: List[String], | |
yearRange: (Int, Int) | |
) = retrievePage(conj, disj, yearRange).flatMap { json => | |
val results = json.extract[ResultSet] | |
Future.traverse(2 to results.pageCount) { i => | |
retrievePage(conj, disj, yearRange, Some(i)).map( | |
_.extract[ResultSet].items | |
) | |
}.map(results.items ++ _.flatten) | |
} | |
// Asynchronously begin downloads and print result when completed. | |
def saveSearchResults( | |
conj: List[String], | |
disj: List[String], | |
yearRange: (Int, Int) )(path: String) = search(conj, disj, yearRange).onComplete { | |
case Success(items) => | |
println("Successfully downloaded %d items!".format(items.size)) | |
CsvOutput.writeItems(items)(path) | |
case Failure(e) => | |
println("There was a problem: %s".format(e)) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Represents a single newspaper page. | |
case class Item( | |
id: String, | |
url: String, | |
lccn: String, | |
date: String, | |
sequence: Int, | |
title: String, | |
ocr_eng: String | |
) { | |
def pubId = id match { | |
case Item.PubIdPattern(pubId) => pubId | |
} | |
def formattedDate = date match { | |
case Item.DatePattern(year, month, day) => "%s-%s-%s".format(year, month, day) | |
} | |
} | |
// Some helpers for working with item data. | |
object Item { | |
val DatePattern = """(\d\d\d\d)(\d\d)(\d\d)""".r | |
val PubIdPattern = """/lccn/([^/]+)/.*""".r | |
} | |
// Represents a set of search query results. | |
case class ResultSet(totalItems: Int, itemsPerPage: Int, items: List[Item]) { | |
def pageCount = { | |
val quot = (totalItems / itemsPerPage).toInt | |
val remd = (totalItems % itemsPerPage).toInt | |
quot + math.signum(remd) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The boring stuff: writing the CSV file. | |
object CsvOutput { | |
import au.com.bytecode.opencsv.CSVWriter | |
def writeItems(items: List[Item])(path: String) = { | |
val writer = new CSVWriter(new java.io.FileWriter(path)) | |
items.foreach { item => | |
writer.writeNext( | |
Array( | |
item.pubId, | |
item.title, | |
item.formattedDate, | |
item.sequence.toString, | |
item.url, | |
item.ocr_eng.replaceAll("\n", " ") | |
) | |
) | |
} | |
writer.close() | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi that's exactly what I needed.
However I'm very new at scala, could you in a few lines explain what's the right way to assemble the pieces?
(I already installed sbt)