Skip to content

Instantly share code, notes, and snippets.

@travisbrown
Last active August 29, 2015 13:57
Show Gist options
  • Save travisbrown/9609756 to your computer and use it in GitHub Desktop.
Save travisbrown/9609756 to your computer and use it in GitHub Desktop.
import scalaz._, Scalaz._, concurrent.{ Future, Task }
import argonaut._, Argonaut._
object ArgonautDemo extends JsonDemo {
val entitySum = DecodeJson(c =>
entityNames.traverseU(c.get[JsonArray](_).map(_.size)).map(_.sum)
)
val resultReads = DecodeJson(c =>
c.get[Json]("delete").map(_ => Result.deletion) ||| (
(c --\ "entities").as(entitySum) |@|
(c --\ "user" --\ "profile_background_color").as[String]
)(Result.tweet _)
)
def parseLine(line: String) = new Task(Future.fork(Future.delay(
Parse.decodeEither(line)(resultReads).leftMap(new RuntimeException(_))
)))
}
scalaVersion := "2.10.3"
resolvers ++= Seq(
"Scalaz Bintray Repo" at "http://dl.bintray.com/scalaz/releases",
"Sonatype OSS Snapshots" at "http://oss.sonatype.org/content/repositories/snapshots/",
"Typesafe repository" at "http://repo.typesafe.com/typesafe/releases/"
)
libraryDependencies ++= Seq(
"com.typesafe.play" %% "play-json" % "2.2.2-RC1",
"io.argonaut" %% "argonaut" % "6.0.3",
"org.json4s" %% "json4s-jackson" % "3.2.6",
"org.scalaz.stream" %% "scalaz-stream" % "0.3.1",
"org.scalaz" %% "scalaz-concurrent" % "7.0.6",
"org.typelevel" %% "shapeless-scalaz" % "0.2-SNAPSHOT"
)
import scalaz.concurrent.Task
import org.json4s._
import org.json4s.jackson.JsonMethods.parse
/**
* Quick and dirty: who cares about nice error messages or being careful about
* validation, let's just throw exceptions.
*/
object Json4sDemo extends JsonDemo {
def parseLine(line: String) = Task {
parse(line) match {
case obj: JObject =>
if (obj.values.contains("delete")) Result.deletion else {
val entities = obj \ "entities"
val entityCount = entityNames.map(entities \ _).collect {
case JArray(values) => values.size
}.sum
val color = obj \ "user" \ "profile_background_color" match {
case JString(value) => value
}
Result.tweet(entityCount, color)
}
}
}
}
import scalaz._, Scalaz._, concurrent.Task, stream.io.linesR
import shapeless.contrib.scalaz._
trait JsonDemo {
val entityNames = List("hashtags", "symbols", "urls", "user_mentions")
case class Result(deletions: Int, entities: Int, colors: Map[String, Int])
object Result {
val deletion = Result(1, 0, Map.empty)
def tweet(entities: Int, color: String) =
Result(0, entities, (color != "C0DEED") ?? Map(color -> 1))
}
def parseLine(line: String): Task[Result]
def lines = linesR("../sample-all-2014-03-16.json")
def result = lines.gatherMap(16)(parseLine).runFoldMap(identity)
}
import scalaz.concurrent.Task
import play.api.libs.json._, play.api.libs.functional.syntax._
object PlayJsonDemo extends JsonDemo {
val arraySize = __.json.pick[JsArray].map(_.value.size)
val entitySum = (
(__ \ 'hashtags).read(arraySize) and
(__ \ 'symbols).read(arraySize) and
(__ \ 'urls).read(arraySize) and
(__ \ 'user_mentions).read(arraySize)
)((a, b, c, d) => a + b + c + d)
val resultReads = (__ \ 'delete).json.pick.map(_ => Result.deletion) or (
(__ \ 'entities).read(entitySum) and
(__ \ 'user \ 'profile_background_color).read[String]
)(Result.tweet _)
def parseLine(line: String) = Task(Json.parse(line).as(resultReads))
}
@travisbrown
Copy link
Author

In response to a challenge by Christopher Brown. On just over a million tweets on my dual-core laptop, the Play implementation takes about 39 seconds, Json4s about 38, and Argonaut about 51.

@mandubian
Copy link

It's cool to see that with functional composers from Play-Json, we don't lose much time compared to more "imperative" code like json4s (which you could also write for play-json)...
I wonder how it behaves with JsZipper ;)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment