Skip to content

Instantly share code, notes, and snippets.

@nkipa
Forked from atriptoparadise/Stacking Model
Created March 20, 2020 16:48
Show Gist options
  • Save nkipa/b52e6275cbf2a0efec56e6040decf52b to your computer and use it in GitHub Desktop.
Save nkipa/b52e6275cbf2a0efec56e6040decf52b to your computer and use it in GitHub Desktop.
def modelingFor5(Account):
df_model = df[df['Account'] == Account]
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)
data = pd.DataFrame(df_timeseries.NetAmount)
data.columns = ["y"]
# Drop data after 2019-06
data = data.loc[data.index[:-1]]
# Adding the lag of the target variable from 7 steps back up to 48 months ago
for i in range(7, 48):
data["lag_{}".format(i)] = data.y.shift(i)
y = data.dropna().y
X = data.dropna().drop(['y'], axis=1)
# Reserve 30% of data for testing
X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3)
# Scaling
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
prediction1 = lr.predict(X_test_scaled)
error_linear = mean_absolute_percentage_error(prediction1, y_test)
# Ridge
ridge = RidgeCV(cv=tscv)
ridge.fit(X_train_scaled, y_train)
prediction2 = ridge.predict(X_test_scaled)
error_ridge = mean_absolute_percentage_error(prediction2, y_test)
# Lasso
lasso = LassoCV(cv=tscv)
lasso.fit(X_train_scaled, y_train)
prediction3 = lasso.predict(X_test_scaled)
error_lasso = mean_absolute_percentage_error(prediction3, y_test)
# XGB
xgb = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=0.3, gamma=0,
importance_type='gain', learning_rate=0.1, max_delta_step=0,
max_depth=4, min_child_weight=1, missing=None, n_estimators=100,
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
silent=None, subsample=0.5, verbosity=1)
xgb.fit(X_train_scaled, y_train)
prediction4 = xgb.predict(X_test_scaled)
error_xgb = mean_absolute_percentage_error(prediction4, y_test)
# LightGBM
lgb_train = lgb.Dataset(X_train_scaled, y_train)
lgb_eval = lgb.Dataset(X_test_scaled, y_test, reference=lgb_train)
lightgbm_params = {'boosting_type': 'gbdt',
'colsample_bytree': 0.65,
'learning_rate': 0.001,
'n_estimators': 20,
'num_leaves': 3,
'reg_alpha': 0.5,
'reg_lambda': 0.5,
'subsample': 0.7}
gbm = lgb.train(lightgbm_params,lgb_train,num_boost_round=10,valid_sets=lgb_eval)
prediction5 = gbm.predict(X_test_scaled)
error_lightgbm = mean_absolute_percentage_error(prediction5, y_test)
# Stacking
stack_error = mean_absolute_percentage_error(prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4, y_test)
l1 = [error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm, stack_error,
(error_linear+error_ridge+error_lasso+error_xgb+error_lightgbm)/5,
min(error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm)]
global df_final
df_final = pd.DataFrame({Account:l1}).T
df_final.columns = ['linear', 'ridge', 'lasso', 'xgb', 'lightgbm', 'stack_error', 'avg_error', 'min_error']
return df_final
def getMetics(Accountlist):
df = modelingFor5(l1[0])
for i in Accountlist[1:]:
df = df.append(modelingFor5(i))
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment