ryancutter · May 15, 2016 21:18
diff --git a/movies.py b/movies.py
 # issue movies query
 conf = {"es.resource" : "movies/logs", "es.query" : "?q=name:bourne"}
 movies = sc.newAPIHadoopRDD("org.elasticsearch.hadoop.mr.EsInputFormat",\
    "org.apache.hadoop.io.NullWritable", "org.elasticsearch.hadoop.mr.LinkedMapWritable", conf=conf)

 # place results in table
 moviesRows = movies.map(lambda p: Row(id=int(p[1]['id']), name=p[1]['name']))
 moviesRowsList = moviesRows.collect()
 schemaMovies = sqlContext.createDataFrame(moviesRowsList)
 schemaMovies.registerTempTable("movies")
 sqlContext.cacheTable("movies")
	# issue movies query
	conf = {"es.resource" : "movies/logs", "es.query" : "?q=name:bourne"}
	movies = sc.newAPIHadoopRDD("org.elasticsearch.hadoop.mr.EsInputFormat",\
	"org.apache.hadoop.io.NullWritable", "org.elasticsearch.hadoop.mr.LinkedMapWritable", conf=conf)

	# place results in table
	moviesRows = movies.map(lambda p: Row(id=int(p[1]['id']), name=p[1]['name']))
	moviesRowsList = moviesRows.collect()
	schemaMovies = sqlContext.createDataFrame(moviesRowsList)
	schemaMovies.registerTempTable("movies")
	sqlContext.cacheTable("movies")