Last active
March 20, 2020 16:48
-
-
Save atriptoparadise/5f19aabae702f6cdb34c6660012e6595 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def modelingFor5(Account): | |
df_model = df[df['Account'] == Account] | |
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0) | |
data = pd.DataFrame(df_timeseries.NetAmount) | |
data.columns = ["y"] | |
# Drop data after 2019-06 | |
data = data.loc[data.index[:-1]] | |
# Adding the lag of the target variable from 7 steps back up to 48 months ago | |
for i in range(7, 48): | |
data["lag_{}".format(i)] = data.y.shift(i) | |
y = data.dropna().y | |
X = data.dropna().drop(['y'], axis=1) | |
# Reserve 30% of data for testing | |
X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3) | |
# Scaling | |
X_train_scaled = scaler.fit_transform(X_train) | |
X_test_scaled = scaler.transform(X_test) | |
# Linear Regression | |
lr = LinearRegression() | |
lr.fit(X_train_scaled, y_train) | |
prediction1 = lr.predict(X_test_scaled) | |
error_linear = mean_absolute_percentage_error(prediction1, y_test) | |
# Ridge | |
ridge = RidgeCV(cv=tscv) | |
ridge.fit(X_train_scaled, y_train) | |
prediction2 = ridge.predict(X_test_scaled) | |
error_ridge = mean_absolute_percentage_error(prediction2, y_test) | |
# Lasso | |
lasso = LassoCV(cv=tscv) | |
lasso.fit(X_train_scaled, y_train) | |
prediction3 = lasso.predict(X_test_scaled) | |
error_lasso = mean_absolute_percentage_error(prediction3, y_test) | |
# XGB | |
xgb = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, | |
colsample_bynode=1, colsample_bytree=0.3, gamma=0, | |
importance_type='gain', learning_rate=0.1, max_delta_step=0, | |
max_depth=4, min_child_weight=1, missing=None, n_estimators=100, | |
n_jobs=1, nthread=None, objective='reg:linear', random_state=0, | |
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, | |
silent=None, subsample=0.5, verbosity=1) | |
xgb.fit(X_train_scaled, y_train) | |
prediction4 = xgb.predict(X_test_scaled) | |
error_xgb = mean_absolute_percentage_error(prediction4, y_test) | |
# LightGBM | |
lgb_train = lgb.Dataset(X_train_scaled, y_train) | |
lgb_eval = lgb.Dataset(X_test_scaled, y_test, reference=lgb_train) | |
lightgbm_params = {'boosting_type': 'gbdt', | |
'colsample_bytree': 0.65, | |
'learning_rate': 0.001, | |
'n_estimators': 20, | |
'num_leaves': 3, | |
'reg_alpha': 0.5, | |
'reg_lambda': 0.5, | |
'subsample': 0.7} | |
gbm = lgb.train(lightgbm_params,lgb_train,num_boost_round=10,valid_sets=lgb_eval) | |
prediction5 = gbm.predict(X_test_scaled) | |
error_lightgbm = mean_absolute_percentage_error(prediction5, y_test) | |
# Stacking | |
stack_error = mean_absolute_percentage_error(prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4, y_test) | |
l1 = [error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm, stack_error, | |
(error_linear+error_ridge+error_lasso+error_xgb+error_lightgbm)/5, | |
min(error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm)] | |
global df_final | |
df_final = pd.DataFrame({Account:l1}).T | |
df_final.columns = ['linear', 'ridge', 'lasso', 'xgb', 'lightgbm', 'stack_error', 'avg_error', 'min_error'] | |
return df_final | |
def getMetics(Accountlist): | |
df = modelingFor5(l1[0]) | |
for i in Accountlist[1:]: | |
df = df.append(modelingFor5(i)) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment