gbraccialli · April 27, 2017 11:44
diff --git a/spark_tpc_ds.scala b/spark_tpc_ds.scala
 git clone https://github.com/databricks/spark-sql-perf.git
 cd spark-sql-perf
 sbt assembly

 git clone https://github.com/davies/tpcds-kit
 sudo yum groupinstall "Development Tools"
 cd tpcds-kit/tools
 cp Makefile.suite Makefile
 make

 spark-shell
 spark.conf.set("spark.sql.shuffle.partitions","100")
 import com.databricks.spark.sql.perf.tpcds.Tables
 val tables = new Tables(spark.sqlContext, "/tmp/tools", 10)
 tables.genData(location = "hdfs:///tmp/tpc",format = "parquet",overwrite = true ,partitionTables = true,useDoubleForDecimal = false,clusterByPartitionColumns = true,filterOutNullPartitionValues = true,numPartitions = 100)
 tables.createExternalTables("hdfs:///tmp/tpc", "parquet", "default", false)
 spark.catalog.listTables.collect.foreach(t => {try{println(t.name);spark.sqlContext.sql("alter table " + t.name + " recover partitions ").show}catch{ case e: org.apache.spark.sql.AnalysisException => println("error")}})
 import com.databricks.spark.sql.perf.tpcds.TPCDS
 val tpcds = new TPCDS (sqlContext = spark.sqlContext)
 //val experiment = tpcds.runExperiment(tpcds.interactiveQueries)
 val experiment = tpcds.run(tpcds.runnable)
	git clone https://github.com/databricks/spark-sql-perf.git
	cd spark-sql-perf
	sbt assembly

	git clone https://github.com/davies/tpcds-kit
	sudo yum groupinstall "Development Tools"
	cd tpcds-kit/tools
	cp Makefile.suite Makefile
	make

	spark-shell
	spark.conf.set("spark.sql.shuffle.partitions","100")
	import com.databricks.spark.sql.perf.tpcds.Tables
	val tables = new Tables(spark.sqlContext, "/tmp/tools", 10)
	tables.genData(location = "hdfs:///tmp/tpc",format = "parquet",overwrite = true ,partitionTables = true,useDoubleForDecimal = false,clusterByPartitionColumns = true,filterOutNullPartitionValues = true,numPartitions = 100)
	tables.createExternalTables("hdfs:///tmp/tpc", "parquet", "default", false)
	spark.catalog.listTables.collect.foreach(t => {try{println(t.name);spark.sqlContext.sql("alter table " + t.name + " recover partitions ").show}catch{ case e: org.apache.spark.sql.AnalysisException => println("error")}})
	import com.databricks.spark.sql.perf.tpcds.TPCDS
	val tpcds = new TPCDS (sqlContext = spark.sqlContext)
	//val experiment = tpcds.runExperiment(tpcds.interactiveQueries)
	val experiment = tpcds.run(tpcds.runnable)