josep2 · March 9, 2017 23:47
diff --git a/succinct_dataframe_example.scala b/succinct_dataframe_example.scala
 // From https://github.com/amplab/succinct
 import edu.berkeley.cs.succinct.sql._

 // Create a schema
 val citySchema = StructType(Seq(
  StructField("Name", StringType, false),
  StructField("Length", IntegerType, true),
  StructField("Area", DoubleType, false),
  StructField("Airport", BooleanType, true)))

 // Create an RDD of Rows with some data; sc is the SparkContext
 val cityRDD = sc.parallelize(Seq(
  Row("San Francisco", 12, 44.52, true),
  Row("Palo Alto", 12, 22.33, false),
  Row("Munich", 8, 3.14, true)))

 // Create a data frame from the RDD and the schema
 val cityDataFrame = sqlContext.createDataFrame(cityRDD, citySchema)

 // Save the DataFrame in the "Succinct" format
 cityDataFrame.write.format("edu.berkeley.cs.succinct.sql").save("/path/to/data")

 // Read the Succinct DataFrame from the saved path
 val succinctCities = sqlContext.succinctTable("/path/to/data")

 // Filter and prune
 val bigCities = succinctCities.filter("Area >= 22.0").select("Name").collect

 // Alternately, use the DataFrameReader API:
 cityDataFrame.write.format("edu.berkeley.cs.succinct.sql").save("/path/to/data")
 val succinctCities2 = sqlContext.read.format("edu.berkeley.cs.succinct.sql").load("/path/to/data")
 val smallCities = succinctCities2.filter("Area <= 10.0").select("Name").collect
	// From https://github.com/amplab/succinct
	import edu.berkeley.cs.succinct.sql._

	// Create a schema
	val citySchema = StructType(Seq(
	StructField("Name", StringType, false),
	StructField("Length", IntegerType, true),
	StructField("Area", DoubleType, false),
	StructField("Airport", BooleanType, true)))

	// Create an RDD of Rows with some data; sc is the SparkContext
	val cityRDD = sc.parallelize(Seq(
	Row("San Francisco", 12, 44.52, true),
	Row("Palo Alto", 12, 22.33, false),
	Row("Munich", 8, 3.14, true)))

	// Create a data frame from the RDD and the schema
	val cityDataFrame = sqlContext.createDataFrame(cityRDD, citySchema)

	// Save the DataFrame in the "Succinct" format
	cityDataFrame.write.format("edu.berkeley.cs.succinct.sql").save("/path/to/data")

	// Read the Succinct DataFrame from the saved path
	val succinctCities = sqlContext.succinctTable("/path/to/data")

	// Filter and prune
	val bigCities = succinctCities.filter("Area >= 22.0").select("Name").collect

	// Alternately, use the DataFrameReader API:
	cityDataFrame.write.format("edu.berkeley.cs.succinct.sql").save("/path/to/data")
	val succinctCities2 = sqlContext.read.format("edu.berkeley.cs.succinct.sql").load("/path/to/data")
	val smallCities = succinctCities2.filter("Area <= 10.0").select("Name").collect