Skip to content

Instantly share code, notes, and snippets.

@fclesio
Created August 6, 2024 12:54
Show Gist options
  • Save fclesio/1b9cf674afdaffdd9f897e3ab985e00b to your computer and use it in GitHub Desktop.
Save fclesio/1b9cf674afdaffdd9f897e3ab985e00b to your computer and use it in GitHub Desktop.
def transform_timestamp_to_features(timestamp):
# Convert timestamp to datetime
dt = pd.to_datetime(timestamp)
# Initialize a dictionary to store features
features = {}
# Date Features
features["year"] = dt.year
features["quarter"] = dt.quarter
features["month"] = dt.month
features["week"] = dt.isocalendar().week
features["day"] = dt.day
features["doy"] = dt.dayofyear
features["weekday"] = dt.weekday()
features["is_business_day"] = int(dt.weekday() < 5)
features["dow"] = dt.dayofweek
features["wom"] = (dt.day - 1) // 7 + 1
features["days_in_month"] = pd.Period(year=dt.year, month=dt.month, freq='M').days_in_month
features["season"] = (dt.month % 12 // 3) + 1 # 1: Winter, 2: Spring, 3: Summer, 4: Fall
features["is_leap_year"] = int(dt.is_leap_year)
# Time Features
features["hour"] = dt.hour
features["minute"] = dt.minute
features["second"] = dt.second
# Binary flags for specific times and dates
features["is_morning"] = int(6 <= dt.hour < 12)
features["is_afternoon"] = int(12 <= dt.hour < 18)
features["is_evening"] = int(18 <= dt.hour < 24)
features["is_night"] = int(0 <= dt.hour < 6)
features["is_lunch_time"] = int(dt.hour == 12 and dt.minute < 30)
features["is_midnight"] = int(dt.hour == 0 and dt.minute == 0)
features["is_early_morning"] = int(5 <= dt.hour < 9)
features["is_late_night"] = int(21 <= dt.hour < 24)
features["is_midday"] = int(dt.hour == 12 and dt.minute == 0)
features["is_morning_commute"] = int(dt.hour == 8 and dt.minute <= 30)
features["is_evening_commute"] = int(dt.hour == 17 and dt.minute >= 30)
features["is_first_hour_of_day"] = int(dt.hour == 0)
features["is_last_hour_of_day"] = int(dt.hour == 23)
# Periods of the week
features["is_weekend"] = int(dt.weekday() >= 5)
features["is_weekday_morning_rush"] = int(dt.weekday() < 5 and dt.hour == 8)
features["is_weekday_evening_rush"] = int(dt.weekday() < 5 and dt.hour == 17)
features["is_weekday_start"] = int(dt.weekday() < 5 and dt.hour == 9 and dt.minute == 0)
features["is_weekday_end"] = int(dt.weekday() < 5 and dt.hour == 17 and dt.minute == 0)
features["is_weekend_start"] = int(dt.weekday() == 4 and dt.hour == 18)
features["is_weekend_end"] = int(dt.weekday() == 6 and dt.hour == 23 and dt.minute == 59)
features["is_week_start"] = int(dt.weekday() == 0)
features["is_week_end"] = int(dt.weekday() == 4)
# Periods of the year
features["is_month_start"] = int(dt.is_month_start)
features["is_month_end"] = int(dt.is_month_end)
features["is_quarter_start"] = int(dt.is_quarter_start)
features["is_quarter_end"] = int(dt.is_quarter_end)
features["is_year_start"] = int(dt.is_year_start)
features["is_year_end"] = int(dt.is_year_end)
# Work and non-work hours
features["is_work_hour"] = int(dt.weekday() < 5 and 9 <= dt.hour < 17)
features["is_non_work_hour"] = int(not (dt.weekday() < 5 and 9 <= dt.hour < 17))
# Periods of the hour
features["is_first_minute_of_hour"] = int(dt.minute == 0)
features["is_last_minute_of_hour"] = int(dt.minute == 59)
features["is_half_hour"] = int(dt.minute == 30)
features["is_quarter_past"] = int(dt.minute == 15)
features["is_quarter_to"] = int(dt.minute == 45)
# Aggregated and cyclic features
features["minute_of_day"] = dt.hour * 60 + dt.minute
features["second_of_day"] = features["minute_of_day"] * 60 + dt.second
features["minute_sin"] = np.sin(2 * np.pi * dt.minute / 60)
features["minute_cos"] = np.cos(2 * np.pi * dt.minute / 60)
features["hour_sin"] = np.sin(2 * np.pi * dt.hour / 24)
features["hour_cos"] = np.cos(2 * np.pi * dt.hour / 24)
features["week_of_month"] = (dt.day - 1) // 7 + 1
features["dayofweek_sin"] = np.sin(2 * np.pi * dt.weekday() / 7)
features["dayofweek_cos"] = np.cos(2 * np.pi * dt.weekday() / 7)
# Additional time features
features["minute_of_half_hour"] = dt.minute % 30
features["minute_of_quarter"] = dt.minute % 15
features["second_of_hour"] = dt.second + dt.minute * 60
features["is_am"] = int(dt.hour < 12)
features["is_pm"] = int(dt.hour >= 12)
features["is_early_morning"] = int(5 <= dt.hour < 9)
features["is_late_night"] = int(21 <= dt.hour < 24)
features["is_midday"] = int(dt.hour == 12 and dt.minute == 0)
features["is_late_afternoon"] = int(15 <= dt.hour < 18)
features["is_evening_peak"] = int(18 <= dt.hour < 20)
features["is_early_evening"] = int(18 <= dt.hour < 21)
return features
def expand_date_columns(df, column_name):
features_df = df[column_name].apply(transform_timestamp_to_features).apply(pd.Series)
df = pd.concat([df, features_df], axis=1)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment