Skip to content

Instantly share code, notes, and snippets.

@buildlackey
Last active September 7, 2019 00:19
Show Gist options
  • Save buildlackey/93d4f14fd321b39c1371f8baff05f570 to your computer and use it in GitHub Desktop.
Save buildlackey/93d4f14fd321b39c1371f8baff05f570 to your computer and use it in GitHub Desktop.
json.csv.timestamp
// Will work on MacOS and Linux,
// but needs slight modification on Windows where noted
import java.io.{FileOutputStream, PrintWriter}
import org.apache.spark.sql.types._
import sys.process._
System.setProperty("user.timezone", "PST");
TimeZone.setDefault(TimeZone.getTimeZone("PST"))
"rm -rf /tmp/data.csv".! // might not work on Windows
"rm -rf /tmp/data.json".! // unless Cygwin is installed
val csvfile = "/tmp/data.csv"
val jsonfile = "/tmp/data.json"
def writeString(str: String) = {
new PrintWriter(new FileOutputStream(csvfile)) { write(str) ; close() }
}
val input =
"""name|score|date
|joe|2|1970-01-01T00:00:00+0000
|bob|3|1970-01-01T00:00:00+0100
|ray|4|1970-01-01T00:00:00-0100""".stripMargin
val schema = StructType(
List(
StructField("name", StringType),
StructField("score", IntegerType),
StructField("date", TimestampType)
)
)
writeString(input)
val df = spark.read.
format("csv").
schema(schema).
option("header", "true").
option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ssX").
option("delimiter", "|").load(csvfile)
df.printSchema()
df.show(false)
df.write.
option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ssX").
json(jsonfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment