Skip to content

Instantly share code, notes, and snippets.

@marcintustin
Created January 1, 2018 20:00
Show Gist options
  • Save marcintustin/2d5f829b83a5c631a1506dbc7081bfc9 to your computer and use it in GitHub Desktop.
Save marcintustin/2d5f829b83a5c631a1506dbc7081bfc9 to your computer and use it in GitHub Desktop.
A somewhat working attempt at some generic spark code
trait TimeColumns {
val date: Date
val segment_time: Timestamp
val station_id: Integer
val start_time: Timestamp
val end_time: Timestamp
}
case class Viewership(
date: Date, segment_time: Timestamp, station_id: Integer, start_time: Timestamp, end_time: Timestamp,
viewership_property: String) with TimeColumns
case class Schedule(
program_id: Integer,
date: Date, segment_time: Timestamp, station_id: Integer, start_time: Timestamp, end_time: Timestamp) with TimeColumns
case class ViewershipWithSchedule(
date: Date, segment_time: Timestamp, station_id: Integer, start_time: Timestamp, end_time: Timestamp,
viewership_property: String, program_id: Integer) with TimeColumns
def matchViewershipWithSchedules[T <: TimeColumns, RESULT](
viewership: Dataset[T], schedules: Dataset[Schedule])(
// You need this so that RESULT can be used as a type parameter
// inside the body of the function. A quirk of how Scala conforms
// to Java's type erasure rules
implicit ev: TypeTag[RESULT]): Dataset[RESULT] = {
import session.implicits._
viewershipWithQHAndDate.toDF.as('viewership).join(schedules.toDF.as('schedules),
$"viewership.date" === $"schedules.date"
&& $"viewership.segment_time" === $"schedules.segment_time"
&& $"viewership.station_id" === $"schedules.station_id")
// filter on start and end time to avoid spurious matches
.filter(
$"schedules.start_time" < $"viewership.end_time")
&& $"schedules.end_time" > $"viewership.start_time")
)
// DANGER! if you pass in a ViewershipWithSchedule as the viewership, this will break
.selectExpr("viewership.*", "schedules.program_id").as[RESULT]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment