Last active
October 6, 2022 19:57
-
-
Save Eligijus112/3fd9f46fa2f3c2de12e2311d16e59ec0 to your computer and use it in GitHub Desktop.
Date conversion and feature engineering for NYC cab data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from datetime import datetime | |
| import numpy as np | |
| import re | |
| # To datetime conversion | |
| def to_datetime(x: str) -> datetime: | |
| """ | |
| Converts a string to a datetime object | |
| An example of the string is 2010-02-02 17:24:55 | |
| """ | |
| # Inspecting whether x is datetime | |
| if isinstance(x, datetime): | |
| return x | |
| try: | |
| # Dropping the UTC part from the date strings | |
| x = re.sub(' UTC', '', x) | |
| return datetime.strptime(x, '%Y-%m-%d %H:%M:%S') | |
| except: | |
| print(f"Error converting {x} to datetime") | |
| return pd.to_datetime(x) | |
| def create_date_vars( | |
| d: pd.DataFrame, | |
| date_var: str = 'pickup_datetime' | |
| ) -> pd.DataFrame: | |
| """ | |
| Creates the datetime variables | |
| Creates the following columns | |
| * pickup_dayofweek - The day of the week at pickup time | |
| * pickup_hour - The hour of the day at pickup time | |
| * pickup_dayofyear - The day of the year at pickup time | |
| * pickup_hour_sin, pickup_hour_cos - The sine and cosine of the hour of the day | |
| * pickup_dayofyear_sin, pickup_dayofyear_cos - The sine and cosine of the day of the year | |
| """ | |
| # Infering the day of the week from pickup_datetime | |
| d[date_var] = [to_datetime(x) for x in d[date_var]] | |
| d['pickup_dayofweek'] = d[date_var].dt.dayofweek | |
| # Infering the hour of the day from pickup_datetime | |
| d['pickup_hour'] = d[date_var].dt.hour | |
| # Creating a new variable for the day of the year | |
| d['pickup_dayofyear'] = d[date_var].dt.dayofyear | |
| # Ensuring a monotonic relationship between pickup_hour and pickup_dayofyear | |
| d['pickup_hour_sin'] = np.sin(2 * np.pi * d['pickup_hour']/23.0) | |
| d['pickup_hour_cos'] = np.cos(2 * np.pi * d['pickup_hour']/23.0) | |
| d['pickup_dayofyear_sin'] = np.sin(2 * np.pi * d['pickup_dayofyear']/365.0) | |
| d['pickup_dayofyear_cos'] = np.cos(2 * np.pi * d['pickup_dayofyear']/365.0) | |
| return d |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment