Skip to content

Instantly share code, notes, and snippets.

@fish8
Created November 12, 2021 07:30
Show Gist options
  • Save fish8/8264b4034860cf2f8d5551f20851e773 to your computer and use it in GitHub Desktop.
Save fish8/8264b4034860cf2f8d5551f20851e773 to your computer and use it in GitHub Desktop.
read time matrix
case class TimeMatrixCase(
userId: Long,
windowDateList: List[String],
sourceCellIdPlaceId: Map[String, Int],
timeMatrix: List[List[Int]],
time_map: Map[String, Int],
weekdayDaySeconds: Map[String, Int],
weekdayEveningSeconds: Map[String, Int],
weekendDaySeconds: Map[String, Int],
weekendEveningSeconds: Map[String, Int],
index_map: Map[String, List[List[(String, Int)]]],
indoor_map: Map[String, List[List[((String, Int), List[String])]]]
)
val dataFrame = spark.read.parquet("hdfs://10.244.12.215:8020/user/ss_deploy/workspace/ss-ng/beijing/hpu/ulm/time_matrix/0/2021/07/08/csv_1_1/part-00453*")
val timeMatrix = dataFrame.as[TimeMatrixCase].rdd
val result = timeMatrix.map(x => {
val weekday = x.weekdayDaySeconds.keySet.union(x.weekdayEveningSeconds.keySet)
val weekend = x.weekendDaySeconds.keySet.union(x.weekendEveningSeconds.keySet)
val keys_week = weekday.union(weekend).toList.sorted
val keys_index = x.index_map.keySet.toList.sorted
val keys_cell = x.sourceCellIdPlaceId.keySet.toList.sorted
(keys_index, keys_cell, keys_index.equals(keys_cell))
})
val dataFrame = spark.read.parquet("hdfs://10.244.12.215:8020/user/ss_deploy/workspace/ss-ng/beijing/hpu/ulm/time_matrix/0/2021/07/08/csv_1_1/part-00453*")
val timeMatrix = dataFrame.as[TimeMatrixCase].rdd
timeMatrix.map(x=>x.userId).take(10).foreach(println)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment