Skip to content

Instantly share code, notes, and snippets.

@halegreen
Created November 17, 2017 02:40
Show Gist options
  • Save halegreen/e150aaaacfdb0f278af5f0f7c53be334 to your computer and use it in GitHub Desktop.
Save halegreen/e150aaaacfdb0f278af5f0f7c53be334 to your computer and use it in GitHub Desktop.
用了两层的模型融合,Level 1使用了:XGBoost、LightGBM、RandomForest、ExtraTrees、DecisionTree、AdaBoost,一共6个模型,Level 2使用了LinearRegression来拟合第一层的结果
class Ensemble(object):
def __init__(self, n_splits, stacker, base_models):
self.n_splits = n_splits
self.stacker = stacker
self.base_models = base_models
def fit_predict(self, X, y, T):
X = np.array(X)
y = np.array(y)
T = np.array(T)
folds = list(KFold(n_splits=self.n_splits, shuffle=True, random_state=2016).split(X, y))
S_train = np.zeros((X.shape[0], len(self.base_models)))
S_test = np.zeros((T.shape[0], len(self.base_models)))
for i, clf in enumerate(self.base_models):
S_test_i = np.zeros((T.shape[0], self.n_splits))
for j, (train_idx, test_idx) in enumerate(folds):
X_train = X[train_idx]
y_train = y[train_idx]
X_holdout = X[test_idx]
y_holdout = y[test_idx]
print ("Fit Model %d fold %d" % (i, j))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_holdout)[:]
S_train[test_idx, i] = y_pred
S_test_i[:, j] = clf.predict(T)[:]
S_test[:, i] = S_test_i.mean(axis=1)
# results = cross_val_score(self.stacker, S_train, y, cv=5, scoring='r2')
# print("Stacker score: %.4f (%.4f)" % (results.mean(), results.std()))
# exit()
self.stacker.fit(S_train, y)
res = self.stacker.predict(S_test)[:]
return res
# rf params
rf_params = {}
rf_params['n_estimators'] = 50
rf_params['max_depth'] = 8
rf_params['min_samples_split'] = 100
rf_params['min_samples_leaf'] = 30
# xgb params
xgb_params = {}
xgb_params['n_estimators'] = 50
xgb_params['min_child_weight'] = 12
xgb_params['learning_rate'] = 0.27
xgb_params['max_depth'] = 6
xgb_params['subsample'] = 0.77
xgb_params['reg_lambda'] = 0.8
xgb_params['reg_alpha'] = 0.4
xgb_params['base_score'] = 0
#xgb_params['seed'] = 400
xgb_params['silent'] = 1
# lgb params
lgb_params = {}
lgb_params['n_estimators'] = 50
lgb_params['max_bin'] = 10
lgb_params['learning_rate'] = 0.321 # shrinkage_rate
lgb_params['metric'] = 'l1' # or 'mae'
lgb_params['sub_feature'] = 0.34
lgb_params['bagging_fraction'] = 0.85 # sub_row
lgb_params['bagging_freq'] = 40
lgb_params['num_leaves'] = 512 # num_leaf
lgb_params['min_data'] = 500 # min_data_in_leaf
lgb_params['min_hessian'] = 0.05 # min_sum_hessian_in_leaf
lgb_params['verbose'] = 0
lgb_params['feature_fraction_seed'] = 2
lgb_params['bagging_seed'] = 3
# XGB model
xgb_model = XGBRegressor(**xgb_params)
# lgb model
lgb_model = LGBMRegressor(**lgb_params)
# RF model
rf_model = RandomForestRegressor(**rf_params)
# ET model
et_model = ExtraTreesRegressor()
# SVR model
# SVM is too slow in more then 10000 set
#svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.05)
# DecsionTree model
dt_model = DecisionTreeRegressor()
# AdaBoost model
ada_model = AdaBoostRegressor()
stack = Ensemble(n_splits=5,
stacker=LinearRegression(),
base_models=(rf_model, xgb_model, lgb_model, et_model, ada_model, dt_model))
y_test = stack.fit_predict(x_train, y_train, x_test)
作者:王十二的
链接:http://www.jianshu.com/p/b2f545e5539b
來源:简书
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment