buildlackey · September 7, 2019 00:19
diff --git a/json.csv.timestamp b/json.csv.timestamp
 // Will work on MacOS and Linux, 
 // but needs slight modification on Windows where noted

 import java.io.{FileOutputStream, PrintWriter}
 import org.apache.spark.sql.types._
 import sys.process._


 System.setProperty("user.timezone", "PST");
 TimeZone.setDefault(TimeZone.getTimeZone("PST"))

 "rm -rf /tmp/data.csv".!          // might not work on Windows
 "rm -rf /tmp/data.json".!         // unless Cygwin is installed
 val csvfile = "/tmp/data.csv"
 val jsonfile = "/tmp/data.json"

 def writeString(str: String) = {
   new PrintWriter(new FileOutputStream(csvfile)) { write(str) ; close() }
 }

 val input =
   """name|score|date
     |joe|2|1970-01-01T00:00:00+0000
     |bob|3|1970-01-01T00:00:00+0100
     |ray|4|1970-01-01T00:00:00-0100""".stripMargin

 val schema = StructType(
   List(
     StructField("name", StringType),
     StructField("score", IntegerType),
     StructField("date", TimestampType)
   )
 )

 writeString(input)


 val df = spark.read.
   format("csv").
   schema(schema).
   option("header", "true").
   option("timestampFormat",  "yyyy-MM-dd'T'HH:mm:ssX").
   option("delimiter", "|").load(csvfile)

 df.printSchema()
 df.show(false)

 df.write.
   option("timestampFormat",  "yyyy-MM-dd'T'HH:mm:ssX").
   json(jsonfile)
	// Will work on MacOS and Linux,
	// but needs slight modification on Windows where noted

	import java.io.{FileOutputStream, PrintWriter}
	import org.apache.spark.sql.types._
	import sys.process._


	System.setProperty("user.timezone", "PST");
	TimeZone.setDefault(TimeZone.getTimeZone("PST"))

	"rm -rf /tmp/data.csv".! // might not work on Windows
	"rm -rf /tmp/data.json".! // unless Cygwin is installed
	val csvfile = "/tmp/data.csv"
	val jsonfile = "/tmp/data.json"

	def writeString(str: String) = {
	new PrintWriter(new FileOutputStream(csvfile)) { write(str) ; close() }
	}

	val input =
	"""name\|score\|date
	\|joe\|2\|1970-01-01T00:00:00+0000
	\|bob\|3\|1970-01-01T00:00:00+0100
	\|ray\|4\|1970-01-01T00:00:00-0100""".stripMargin

	val schema = StructType(
	List(
	StructField("name", StringType),
	StructField("score", IntegerType),
	StructField("date", TimestampType)
	)
	)

	writeString(input)


	val df = spark.read.
	format("csv").
	schema(schema).
	option("header", "true").
	option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ssX").
	option("delimiter", "\|").load(csvfile)

	df.printSchema()
	df.show(false)

	df.write.
	option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ssX").
	json(jsonfile)