Created
August 6, 2024 12:54
-
-
Save fclesio/1b9cf674afdaffdd9f897e3ab985e00b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def transform_timestamp_to_features(timestamp): | |
# Convert timestamp to datetime | |
dt = pd.to_datetime(timestamp) | |
# Initialize a dictionary to store features | |
features = {} | |
# Date Features | |
features["year"] = dt.year | |
features["quarter"] = dt.quarter | |
features["month"] = dt.month | |
features["week"] = dt.isocalendar().week | |
features["day"] = dt.day | |
features["doy"] = dt.dayofyear | |
features["weekday"] = dt.weekday() | |
features["is_business_day"] = int(dt.weekday() < 5) | |
features["dow"] = dt.dayofweek | |
features["wom"] = (dt.day - 1) // 7 + 1 | |
features["days_in_month"] = pd.Period(year=dt.year, month=dt.month, freq='M').days_in_month | |
features["season"] = (dt.month % 12 // 3) + 1 # 1: Winter, 2: Spring, 3: Summer, 4: Fall | |
features["is_leap_year"] = int(dt.is_leap_year) | |
# Time Features | |
features["hour"] = dt.hour | |
features["minute"] = dt.minute | |
features["second"] = dt.second | |
# Binary flags for specific times and dates | |
features["is_morning"] = int(6 <= dt.hour < 12) | |
features["is_afternoon"] = int(12 <= dt.hour < 18) | |
features["is_evening"] = int(18 <= dt.hour < 24) | |
features["is_night"] = int(0 <= dt.hour < 6) | |
features["is_lunch_time"] = int(dt.hour == 12 and dt.minute < 30) | |
features["is_midnight"] = int(dt.hour == 0 and dt.minute == 0) | |
features["is_early_morning"] = int(5 <= dt.hour < 9) | |
features["is_late_night"] = int(21 <= dt.hour < 24) | |
features["is_midday"] = int(dt.hour == 12 and dt.minute == 0) | |
features["is_morning_commute"] = int(dt.hour == 8 and dt.minute <= 30) | |
features["is_evening_commute"] = int(dt.hour == 17 and dt.minute >= 30) | |
features["is_first_hour_of_day"] = int(dt.hour == 0) | |
features["is_last_hour_of_day"] = int(dt.hour == 23) | |
# Periods of the week | |
features["is_weekend"] = int(dt.weekday() >= 5) | |
features["is_weekday_morning_rush"] = int(dt.weekday() < 5 and dt.hour == 8) | |
features["is_weekday_evening_rush"] = int(dt.weekday() < 5 and dt.hour == 17) | |
features["is_weekday_start"] = int(dt.weekday() < 5 and dt.hour == 9 and dt.minute == 0) | |
features["is_weekday_end"] = int(dt.weekday() < 5 and dt.hour == 17 and dt.minute == 0) | |
features["is_weekend_start"] = int(dt.weekday() == 4 and dt.hour == 18) | |
features["is_weekend_end"] = int(dt.weekday() == 6 and dt.hour == 23 and dt.minute == 59) | |
features["is_week_start"] = int(dt.weekday() == 0) | |
features["is_week_end"] = int(dt.weekday() == 4) | |
# Periods of the year | |
features["is_month_start"] = int(dt.is_month_start) | |
features["is_month_end"] = int(dt.is_month_end) | |
features["is_quarter_start"] = int(dt.is_quarter_start) | |
features["is_quarter_end"] = int(dt.is_quarter_end) | |
features["is_year_start"] = int(dt.is_year_start) | |
features["is_year_end"] = int(dt.is_year_end) | |
# Work and non-work hours | |
features["is_work_hour"] = int(dt.weekday() < 5 and 9 <= dt.hour < 17) | |
features["is_non_work_hour"] = int(not (dt.weekday() < 5 and 9 <= dt.hour < 17)) | |
# Periods of the hour | |
features["is_first_minute_of_hour"] = int(dt.minute == 0) | |
features["is_last_minute_of_hour"] = int(dt.minute == 59) | |
features["is_half_hour"] = int(dt.minute == 30) | |
features["is_quarter_past"] = int(dt.minute == 15) | |
features["is_quarter_to"] = int(dt.minute == 45) | |
# Aggregated and cyclic features | |
features["minute_of_day"] = dt.hour * 60 + dt.minute | |
features["second_of_day"] = features["minute_of_day"] * 60 + dt.second | |
features["minute_sin"] = np.sin(2 * np.pi * dt.minute / 60) | |
features["minute_cos"] = np.cos(2 * np.pi * dt.minute / 60) | |
features["hour_sin"] = np.sin(2 * np.pi * dt.hour / 24) | |
features["hour_cos"] = np.cos(2 * np.pi * dt.hour / 24) | |
features["week_of_month"] = (dt.day - 1) // 7 + 1 | |
features["dayofweek_sin"] = np.sin(2 * np.pi * dt.weekday() / 7) | |
features["dayofweek_cos"] = np.cos(2 * np.pi * dt.weekday() / 7) | |
# Additional time features | |
features["minute_of_half_hour"] = dt.minute % 30 | |
features["minute_of_quarter"] = dt.minute % 15 | |
features["second_of_hour"] = dt.second + dt.minute * 60 | |
features["is_am"] = int(dt.hour < 12) | |
features["is_pm"] = int(dt.hour >= 12) | |
features["is_early_morning"] = int(5 <= dt.hour < 9) | |
features["is_late_night"] = int(21 <= dt.hour < 24) | |
features["is_midday"] = int(dt.hour == 12 and dt.minute == 0) | |
features["is_late_afternoon"] = int(15 <= dt.hour < 18) | |
features["is_evening_peak"] = int(18 <= dt.hour < 20) | |
features["is_early_evening"] = int(18 <= dt.hour < 21) | |
return features | |
def expand_date_columns(df, column_name): | |
features_df = df[column_name].apply(transform_timestamp_to_features).apply(pd.Series) | |
df = pd.concat([df, features_df], axis=1) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment