Skip to content

Instantly share code, notes, and snippets.

@khajavi
Last active July 27, 2016 11:49
Show Gist options
  • Select an option

  • Save khajavi/986fd096a234039dbf6ee69e68c91476 to your computer and use it in GitHub Desktop.

Select an option

Save khajavi/986fd096a234039dbf6ee69e68c91476 to your computer and use it in GitHub Desktop.
Example of using timestamp data type in Apache Saprk Dataset
import java.sql.Timestamp
import java.text.SimpleDateFormat
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
case class Record(time: java.sql.Timestamp)
object TimeStampExample extends App {
val sc = new SparkContext(new SparkConf().setAppName("app").setMaster("local"))
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
def parse(s: String): Timestamp = {
val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
new Timestamp(dateFormat.parse(s).getTime)
}
val log = Seq("2016-03-15", "2016-03-16")
val ds = log.map(i => Record(parse(i))).toDS
ds.filter(_.time.before(parse("2016-03-16"))).show
ds.show
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment