Skip to content

Instantly share code, notes, and snippets.

@asifr
Last active December 3, 2020 16:48
Show Gist options
  • Select an option

  • Save asifr/bc6ace583504b2faecc4f079c5650c36 to your computer and use it in GitHub Desktop.

Select an option

Save asifr/bc6ace583504b2faecc4f079c5650c36 to your computer and use it in GitHub Desktop.
Label each row of a dataframe with EventXHoursFromNow or EventWithinXHours
import pyspark.sql.functions as F
from pyspark.sql import Window
from pyspark.sql.column import Column
def overlaps(start_first, end_first, start_second, end_second):
return (end_first >= start_second) & (end_second >= start_first)
def eventXHrFromNow(hours: int, time_col: str, start_col: str, end_col: str) -> Column:
xhrs_from_now = F.col(time_col) + F.lit(hours)
return F.when(
(xhrs_from_now >= F.col(start_col)) & (xhrs_from_now <= F.col(end_col)),
F.lit(1),
).otherwise(F.lit(0))
def eventWithinXHrs(hours: int, time_col: str, start_col: str, end_col: str) -> Column:
return F.when(
overlaps(
F.col(time_col) + F.lit(1),
F.col(time_col) + F.lit(1 + hours),
F.col(start_col),
F.col(end_col)
),
F.lit(1),
).otherwise(F.lit(0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment