Skip to content

Instantly share code, notes, and snippets.

@itayw
Last active January 17, 2016 19:31
Show Gist options
  • Save itayw/3c5e76998f334ffc6757 to your computer and use it in GitHub Desktop.
Save itayw/3c5e76998f334ffc6757 to your computer and use it in GitHub Desktop.
import org.elasticsearch.spark._
val options = Map("pushdown" -> "true", "es.nodes" -> "172.17.0.1", "es.port" -> "9200", "es.field.read.as.array.include" -> "posTimes,pos.coordinates,origDetections", "es.index.auto.create" -> "false")
val df = sqlContext.read.format("org.elasticsearch.spark.sql")
.options(options).load("tracks/track")
df.groupBy("kind").count().show()
//df.show()
//df.groupBy("review.user.screen_name").count().show()
//df.printSchema()
//sc.esRDD("reviews/scored_review")
df.registerTempTable("esTracks")
//val userCount = df.count()
//print(userCount)
%dep
z.load("/jars/elasticsearch-hadoop-2.1.2.jar")
import org.elasticsearch.spark._
val options = Map("pushdown" -> "true", "es.nodes" -> "172.17.0.1", "es.port" -> "9200", "es.field.read.as.array.include" -> "", "es.index.auto.create" -> "false")
val df = sqlContext.read.format("org.elasticsearch.spark.sql")
.options(options).load("reviews/review")
//df.groupBy("kind").count().show()
//df.show()
//df.groupBy("review.user.screen_name").count().show()
//df.printSchema()
//sc.esRDD("reviews/scored_review")
df.registerTempTable("esReview")
//val userCount = df.count()
//print(userCount)
import org.elasticsearch.spark._
import org.apache.spark.sql.Row
val options = Map("pushdown" -> "true", "es.nodes" -> "172.17.0.1", "es.port" -> "9200", "es.field.read.as.array.include" -> "posTimes,pos.coordinates,origDetections", "es.index.auto.create" -> "false")
val df = sqlContext.read.format("org.elasticsearch.spark.sql")
.options(options).load("tracks/track")
df.groupBy("kind").count().show()
val result = df.groupBy("kind").count().map {
case Row(kind: String, count: Long) => {
kind + "\t" + count
}
}.collect()
print ("%table Kind\tcount\n" + result.mkString("\n"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment