-
-
Save nkipa/b52e6275cbf2a0efec56e6040decf52b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def modelingFor5(Account): | |
df_model = df[df['Account'] == Account] | |
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0) | |
data = pd.DataFrame(df_timeseries.NetAmount) | |
data.columns = ["y"] | |
# Drop data after 2019-06 | |
data = data.loc[data.index[:-1]] | |
# Adding the lag of the target variable from 7 steps back up to 48 months ago | |
for i in range(7, 48): | |
data["lag_{}".format(i)] = data.y.shift(i) | |
y = data.dropna().y | |
X = data.dropna().drop(['y'], axis=1) | |
# Reserve 30% of data for testing | |
X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3) | |
# Scaling | |
X_train_scaled = scaler.fit_transform(X_train) | |
X_test_scaled = scaler.transform(X_test) | |
# Linear Regression | |
lr = LinearRegression() | |
lr.fit(X_train_scaled, y_train) | |
prediction1 = lr.predict(X_test_scaled) | |
error_linear = mean_absolute_percentage_error(prediction1, y_test) | |
# Ridge | |
ridge = RidgeCV(cv=tscv) | |
ridge.fit(X_train_scaled, y_train) | |
prediction2 = ridge.predict(X_test_scaled) | |
error_ridge = mean_absolute_percentage_error(prediction2, y_test) | |
# Lasso | |
lasso = LassoCV(cv=tscv) | |
lasso.fit(X_train_scaled, y_train) | |
prediction3 = lasso.predict(X_test_scaled) | |
error_lasso = mean_absolute_percentage_error(prediction3, y_test) | |
# XGB | |
xgb = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, | |
colsample_bynode=1, colsample_bytree=0.3, gamma=0, | |
importance_type='gain', learning_rate=0.1, max_delta_step=0, | |
max_depth=4, min_child_weight=1, missing=None, n_estimators=100, | |
n_jobs=1, nthread=None, objective='reg:linear', random_state=0, | |
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, | |
silent=None, subsample=0.5, verbosity=1) | |
xgb.fit(X_train_scaled, y_train) | |
prediction4 = xgb.predict(X_test_scaled) | |
error_xgb = mean_absolute_percentage_error(prediction4, y_test) | |
# LightGBM | |
lgb_train = lgb.Dataset(X_train_scaled, y_train) | |
lgb_eval = lgb.Dataset(X_test_scaled, y_test, reference=lgb_train) | |
lightgbm_params = {'boosting_type': 'gbdt', | |
'colsample_bytree': 0.65, | |
'learning_rate': 0.001, | |
'n_estimators': 20, | |
'num_leaves': 3, | |
'reg_alpha': 0.5, | |
'reg_lambda': 0.5, | |
'subsample': 0.7} | |
gbm = lgb.train(lightgbm_params,lgb_train,num_boost_round=10,valid_sets=lgb_eval) | |
prediction5 = gbm.predict(X_test_scaled) | |
error_lightgbm = mean_absolute_percentage_error(prediction5, y_test) | |
# Stacking | |
stack_error = mean_absolute_percentage_error(prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4, y_test) | |
l1 = [error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm, stack_error, | |
(error_linear+error_ridge+error_lasso+error_xgb+error_lightgbm)/5, | |
min(error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm)] | |
global df_final | |
df_final = pd.DataFrame({Account:l1}).T | |
df_final.columns = ['linear', 'ridge', 'lasso', 'xgb', 'lightgbm', 'stack_error', 'avg_error', 'min_error'] | |
return df_final | |
def getMetics(Accountlist): | |
df = modelingFor5(l1[0]) | |
for i in Accountlist[1:]: | |
df = df.append(modelingFor5(i)) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment