Skip to content

Instantly share code, notes, and snippets.

@a-agmon
Last active February 26, 2023 07:26
Show Gist options
  • Save a-agmon/4f9347b201f0eb232581d7a53d668aef to your computer and use it in GitHub Desktop.
Save a-agmon/4f9347b201f0eb232581d7a53d668aef to your computer and use it in GitHub Desktop.
case class FeaturesRecord(key: String, ts:Timestamp,
features: Seq[Float], label: Float)
private def getFeaturesDataFrame(df: DataFrame): Try[Dataset[FeaturesRecord]] = Try {
df.map(row => {
val key = row.getAs[String]("app_id")
val label = row.getAs[Int]("installs").toFloat
val ts = row.getAs[Timestamp]("event_hour")
val dayOfWeek = row.getAs[Int]("day_of_week").toFloat
val hourOfDay = row.getAs[Int]("hour_of_day").toFloat
var features = Seq(dayOfWeek, hourOfDay) ++
(1 to (6)).map(index => row.getAs[Int](s"lag${index}").toFloat)
(key, ts, features, label)
}).toDF("key", "ts", "features", "label")
.as[FeaturesRecord]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment