Created
October 4, 2018 07:40
-
-
Save marnixkoops/9bfb39296f20929e1fea40c546b0cea0 to your computer and use it in GitHub Desktop.
lagged target features
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Group by product / timewindow and compute aggregate features | |
| print('[+] Generating weekly lagged product aggregation features ...') | |
| agg_week = demand_df.groupby(['product_id', 'year', 'weekofyear'])[ | |
| 'actual_raw'].agg(num_week_lag_aggregations) | |
| agg_week.columns = ["_week_lagged_".join(agg_feature) | |
| for agg_feature in agg_week.columns.ravel()] | |
| agg_week.reset_index(drop=False, inplace=True) | |
| print('[+] Generating monthly lagged product aggregation features ...') | |
| agg_month = demand_df.groupby(['product_id', 'year', 'month'])[ | |
| 'actual_raw'].agg(num_month_lag_aggregations) | |
| agg_month.columns = ["_month_lagged_".join(agg_feature) | |
| for agg_feature in agg_month.columns.ravel()] | |
| agg_month.reset_index(drop=False, inplace=True) | |
| # Lag features with 1 respective timewindow for each product | |
| print('[+] Processing lagged aggregation features ...') | |
| agg_lagged_week = agg_week.groupby(['product_id']).shift(1) | |
| agg_lagged_month = agg_month.groupby(['product_id']).shift(1) | |
| # Add back lost groupby columns | |
| agg_lagged_week[['product_id', 'year', 'weekofyear'] | |
| ] = agg_week[['product_id', 'year', 'weekofyear']] | |
| agg_lagged_month[['product_id', 'year', 'month']] = agg_month[['product_id', 'year', 'month']] | |
| # Impute now missing first observations per product by backfilling with first known observation | |
| agg_lagged_week = agg_lagged_week.groupby(['product_id']).fillna(method='backfill') | |
| agg_lagged_month = agg_lagged_month.groupby(['product_id']).fillna(method='backfill') | |
| # Add back lost groupby columns x2 | |
| agg_lagged_week['product_id'] = agg_week['product_id'] | |
| agg_lagged_month['product_id'] = agg_month['product_id'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment