dacr · February 3, 2026 20:21
diff --git a/elasticsearch-demo-3-fill-with-spark.sc b/elasticsearch-demo-3-fill-with-spark.sc
 // summary : Feed elasticsearch with almost 20 years of chicago crimes (using spark).
 // keywords : scala, elasticsearch, feed, chicago, crimes, bigdata, spark
 // publish : gist
 // authors : David Crosson
 // license : Apache License Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0.txt)
 // id : 385ba213-e769-499c-92ae-3f63cfb72d15
 // created-on : 2019-11-02T21:23:37Z
 // managed-by : https://github.com/dacr/code-examples-manager
 // execution : scala 2.12 ammonite script (http://ammonite.io/) - run as follow 'amm scriptname.sc'

 // spark 2.4.4 is only for scala 2.12, 2.5.x will bring scala 2.13 support
 import $ivy.`org.apache.spark::spark-sql:2.4.4`
 //import $ivy.`org.elasticsearch::elasticsearch-spark-20:7.3.2` // not yet available for scala 2.12 !!!
 import org.apache.spark.sql._

 /*
 Fill elasticsearch with ~19 years of chicago crimes data :
  `curl -L https://data.cityofchicago.org/api/views/ijzp-q8t2/rows.csv?accessType=DOWNLOAD -o crimes.csv`
 */

 val spark =
  SparkSession.builder()
    .master("local[*]")
    .getOrCreate()

 spark.conf.set("spark.sql.session.timeZone", "America/Chicago")

 def sc = spark.sparkContext

 val crimesCSV =
  spark.read.format("csv")
    .option("sep", ",")
    .option("inferSchema", "true")
    .option("header", "true")
    .option("timestampFormat", "MM/d/yyyy hh:mm:ss a")
    .load("crimes.csv")


 println(crimesCSV.count())

 crimesCSV.printSchema()
	// summary : Feed elasticsearch with almost 20 years of chicago crimes (using spark).
	// keywords : scala, elasticsearch, feed, chicago, crimes, bigdata, spark
	// publish : gist
	// authors : David Crosson
	// license : Apache License Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0.txt)
	// id : 385ba213-e769-499c-92ae-3f63cfb72d15
	// created-on : 2019-11-02T21:23:37Z
	// managed-by : https://github.com/dacr/code-examples-manager
	// execution : scala 2.12 ammonite script (http://ammonite.io/) - run as follow 'amm scriptname.sc'

	// spark 2.4.4 is only for scala 2.12, 2.5.x will bring scala 2.13 support
	import $ivy.`org.apache.spark::spark-sql:2.4.4`
	//import $ivy.`org.elasticsearch::elasticsearch-spark-20:7.3.2` // not yet available for scala 2.12 !!!
	import org.apache.spark.sql._

	/*
	Fill elasticsearch with ~19 years of chicago crimes data :
	`curl -L https://data.cityofchicago.org/api/views/ijzp-q8t2/rows.csv?accessType=DOWNLOAD -o crimes.csv`
	*/

	val spark =
	SparkSession.builder()
	.master("local[*]")
	.getOrCreate()

	spark.conf.set("spark.sql.session.timeZone", "America/Chicago")

	def sc = spark.sparkContext

	val crimesCSV =
	spark.read.format("csv")
	.option("sep", ",")
	.option("inferSchema", "true")
	.option("header", "true")
	.option("timestampFormat", "MM/d/yyyy hh:mm:ss a")
	.load("crimes.csv")


	println(crimesCSV.count())

	crimesCSV.printSchema()
No results found