Skip to content

Instantly share code, notes, and snippets.

View risenW's full-sized avatar

Rising Odegua risenW

View GitHub Profile
#fit base models
linear_reg.fit(X_train, y_train)
knn_reg.fit(X_train, y_train)
svr_reg.fit(X_train, y_train)
#make predictions with trained models
pred1 = linear_reg.predict(X_val)
pred2 = knn_reg.predict(X_val)
pred3 = svr_reg.predict(X_val)
#get the data sets
X_train, X_val, y_train, y_val = get_split_data(german_cred, target_name='bad_credit')
#fit single models
log_cf.fit(X_train, y_train)
knn_cf.fit(X_train, y_train)
svc_cf.fit(X_train, y_train)
#make predictions with trained models
pred1 = log_cf.predict(X_val)
#get the data sets
X_train, X_val, y_train, y_val = get_split_data(german_cred, target_name='bad_credit')
#fit single models
log_cf.fit(X_train, y_train)
knn_cf.fit(X_train, y_train)
svc_cf.fit(X_train, y_train)
#make predictions with trained models
pred1 = log_cf.predict(X_val)
#Import the module
from sklearn.ensemble import VotingClassifier
#Pass the classifiers as a list of tuples with model names and the models themselves
max_model = VotingClassifier(estimators=[('logistic_reg', log_cf), ('KNN Classifier', knn_cf), ("SVC", svc_cf)], voting='hard')
max_model.fit(X_train, y_train)
print("Max Voting in sklearn")
print(get_acc(max_model.predict(X_val), y_val))
#Bagging and Boosting models for both classification and regression problems
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, BaggingRegressor
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
#import xgboost as xgb
#bagging algorithms for regression
rand_forest_reg = RandomForestRegressor(n_estimators=100, random_state=rand_seed)
extra_tree_reg = ExtraTreesRegressor(n_estimators=100,random_state=rand_seed)
#get data for regression task
X_train, X_val, y_train, y_val = get_split_data(german_cred, target_name='age_yrs')
#Train and fit these models
rand_forest_reg.fit(X_train, y_train)
extra_tree_reg.fit(X_train, y_train)
bagging_meta_reg.fit(X_train, y_train)
#check their performance
print("MAE of Random Forest is : ", get_mae(rand_forest_reg.predict(X_val), y_val))
#Import boosting regressoion algorithms
# import xgboost.XGBRegressor as xgb_reg
# import lightgbm.LGBRegressor as lgb_reg
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor
#Import boosting regressoion algorithms
# import xgboost.XGBClassifier as xgb_cf
# import lightgbm.LGBClassifier as lgb_cf
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
#get data for regression task
X_train, X_val, y_train, y_val = get_split_data(german_cred, target_name='age_yrs')
#Train and fit these models
ada_reg.fit(X_train, y_train)
gb_reg.fit(X_train, y_train)
#check their performance
print("MAE of AdaBoost is : ", get_mae(ada_reg.predict(X_val), y_val))
print("MAE of Gradient Boosting is : ", get_mae(gb_reg.predict(X_val), y_val))
from sklearn.model_selection import KFold
def stackingModel(base_models, meta_model, features, target, nfolds=10):
#Split data into folds
kfold = KFold(n_splits=nfolds, shuffle=True, random_state=rand_seed)
#initialize arrays to hold predictions
test_predictions = np.zeros((features.shape[0], len(base_models)))
train_predictions = np.zeros((features.shape[0], len(base_models)))
# Train base models
#get data for regression task
target = german_cred['age_yrs']
data = german_cred.drop('age_yrs', axis=1)
data = standardize_data(data)
#first level learners
base_learners = [linear_reg, svr_reg, knn_reg]
#meta learner
meta_ln = svr_reg