Skip to content

Instantly share code, notes, and snippets.

@erikerlandson
Last active June 25, 2016 00:15
Show Gist options
  • Save erikerlandson/44107b811d252f75e45ce636b06c5abb to your computer and use it in GitHub Desktop.
Save erikerlandson/44107b811d252f75e45ce636b06c5abb to your computer and use it in GitHub Desktop.
// 'small' was a subsample of meta-full parquet
scala> small.orderBy(small("@timestamp")).map(utils.toJSON).saveAsTextFile("/home/eje/logdemo/logs_ordered.json")
object utils {
import org.json4s._
import org.json4s.jackson.JsonMethods._
import org.json4s.JsonDSL._
import org.apache.spark.sql.Row
val toJSON = (row: Row) => {
val fv = row.get(19).asInstanceOf[org.apache.spark.mllib.linalg.SparseVector]
val jv =
("ts" -> s"${row.get(0)}") ~
("hn" -> s"${row.get(3)}") ~
("lvl" -> s"${row.get(6)}") ~
("msg" -> s"${row.get(7)}") ~
("fv" -> (
("len" -> fv.size) ~
("idx" -> fv.indices.toVector) ~
("val" -> fv.values.toVector)
))
compact(render(jv))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment