tmcgrath · December 8, 2016 13:35
diff --git a/Cassandra Spark SQL b/Cassandra Spark SQL
 //If you want to run a local cluster
 //start-master.sh
 //start-slave.sh <your-master-url>

 // connect to spark-shell and load cassandra connector jar  OR
 ~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --packages datastax:spark-cassandra-connector:1.6.0-s_2.10

 // connect to spark-shell to spark cluster and load cassandra connector jar
 ~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --master <your-master-url> --packages datastax:spark-cassandra-connector:1.6.0-s_2.10


 // tell Spark catalogue about cassandara
 // More info: internally there is a Catalogue in the spark session/spark sql similiar to
 // hive metastore with entries for databases and tables

 sqlContext.sql(
   """CREATE TEMPORARY TABLE precipitation
     |USING org.apache.spark.sql.cassandra
     |OPTIONS (
     |  keyspace "isd_weather_data",
     |  table "daily_aggregate_precip",
     |  cluster "<your-cluster>",
     |  pushdown "true"
     |)""".stripMargin)
     
 // DSE we automatically generate this inside of internal CassandraHiveMetastore
 // so when you run with dse the table is automatically located

 // prepare dataframe from sql
 val df = sqlContext.sql("select * from precipitation")

 // run commands and note tab completion
 df.show
 df.orderBy("day")
 df. <hit tab>
 // etc
	//If you want to run a local cluster
	//start-master.sh
	//start-slave.sh <your-master-url>

	// connect to spark-shell and load cassandra connector jar OR
	~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --packages datastax:spark-cassandra-connector:1.6.0-s_2.10

	// connect to spark-shell to spark cluster and load cassandra connector jar
	~/Development/spark-1.6.3-bin-hadoop2.6/bin/spark-shell --master <your-master-url> --packages datastax:spark-cassandra-connector:1.6.0-s_2.10


	// tell Spark catalogue about cassandara
	// More info: internally there is a Catalogue in the spark session/spark sql similiar to
	// hive metastore with entries for databases and tables

	sqlContext.sql(
	"""CREATE TEMPORARY TABLE precipitation
	\|USING org.apache.spark.sql.cassandra
	\|OPTIONS (
	\| keyspace "isd_weather_data",
	\| table "daily_aggregate_precip",
	\| cluster "<your-cluster>",
	\| pushdown "true"
	\|)""".stripMargin)

	// DSE we automatically generate this inside of internal CassandraHiveMetastore
	// so when you run with dse the table is automatically located

	// prepare dataframe from sql
	val df = sqlContext.sql("select * from precipitation")

	// run commands and note tab completion
	df.show
	df.orderBy("day")
	df. <hit tab>
	// etc