Skip to content

Instantly share code, notes, and snippets.

@jspooner
Last active July 18, 2018 17:26
Show Gist options
  • Save jspooner/54cff2a299c4d1a3293f9d4c05c9bb8e to your computer and use it in GitHub Desktop.
Save jspooner/54cff2a299c4d1a3293f9d4c05c9bb8e to your computer and use it in GitHub Desktop.
Databricks + es-hadoop + Amazon Elasticsearch Service
import org.elasticsearch.spark._
import org.elasticsearch.spark.rdd.EsSpark
import org.elasticsearch.spark.sql._
// Create a DataFrame to write to ElasticSearch
case class SimpsonCharacter(name: String, actor: String, episodeDebut: String)
val simpsonsDF = sc.parallelize(
SimpsonCharacter("Homer", "Dan Castellaneta", "Good Night") ::
SimpsonCharacter("Marge", "Julie Kavner", "Good Night") ::
SimpsonCharacter("Bart", "Nancy Cartwright", "Good Night") ::
SimpsonCharacter("Lisa", "Yeardley Smith", "Good Night") ::
SimpsonCharacter("Maggie", "Liz Georges and more", "Good Night") ::
SimpsonCharacter("Sideshow Bob", "Kelsey Grammer", "The Telltale Head") ::
Nil).toDF().repartition(1)
var esConfig:Map[String,String] = Map("es.nodes" -> "http://myescluster.us-east-1.es.amazonaws.com",
"es.port" -> "80",
"es.nodes.discovery" -> "false",
"es.nodes.wan.only" -> "true"
)
EsSparkSQL.saveToEs(simpsonsDF, "shows/simpsons", esConfig)