jmrr · June 23, 2022 20:04
diff --git a/mysql2parquet.scala b/mysql2parquet.scala
 val sqlContext = new org.apache.spark.sql.SQLContext(sc) // optional

 val df = sqlContext.load("jdbc", Map(
          "url" -> "jdbc:mysql://<ip.address.your.db>/<table>?user=<username>&password=<pwd>",
          "dbtable" -> "<tablename>"))

 df.select("<col1>","<col2>","<col3>").save("</path/to/parquet/file.parquet>","parquet")

 //Alternatively, to save all the columns:

 df.write.parquet("</path/to/parquet/file.parquet>")
	val sqlContext = new org.apache.spark.sql.SQLContext(sc) // optional

	val df = sqlContext.load("jdbc", Map(
	"url" -> "jdbc:mysql://<ip.address.your.db>/<table>?user=<username>&password=<pwd>",
	"dbtable" -> "<tablename>"))

	df.select("<col1>","<col2>","<col3>").save("</path/to/parquet/file.parquet>","parquet")

	//Alternatively, to save all the columns:

	df.write.parquet("</path/to/parquet/file.parquet>")