Skip to content

Instantly share code, notes, and snippets.

@gbraccialli
Last active April 27, 2017 11:44
Show Gist options
  • Save gbraccialli/66c9f37ac46228fa566668dcc962d718 to your computer and use it in GitHub Desktop.
Save gbraccialli/66c9f37ac46228fa566668dcc962d718 to your computer and use it in GitHub Desktop.
git clone https://github.com/databricks/spark-sql-perf.git
cd spark-sql-perf
sbt assembly
git clone https://github.com/davies/tpcds-kit
sudo yum groupinstall "Development Tools"
cd tpcds-kit/tools
cp Makefile.suite Makefile
make
spark-shell
spark.conf.set("spark.sql.shuffle.partitions","100")
import com.databricks.spark.sql.perf.tpcds.Tables
val tables = new Tables(spark.sqlContext, "/tmp/tools", 10)
tables.genData(location = "hdfs:///tmp/tpc",format = "parquet",overwrite = true ,partitionTables = true,useDoubleForDecimal = false,clusterByPartitionColumns = true,filterOutNullPartitionValues = true,numPartitions = 100)
tables.createExternalTables("hdfs:///tmp/tpc", "parquet", "default", false)
spark.catalog.listTables.collect.foreach(t => {try{println(t.name);spark.sqlContext.sql("alter table " + t.name + " recover partitions ").show}catch{ case e: org.apache.spark.sql.AnalysisException => println("error")}})
import com.databricks.spark.sql.perf.tpcds.TPCDS
val tpcds = new TPCDS (sqlContext = spark.sqlContext)
//val experiment = tpcds.runExperiment(tpcds.interactiveQueries)
val experiment = tpcds.run(tpcds.runnable)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment