Skip to content

Instantly share code, notes, and snippets.

@im-noob
Last active October 16, 2021 08:55
Show Gist options
  • Save im-noob/2696f20d6df51fdb6091dfcd283ed07f to your computer and use it in GitHub Desktop.
Save im-noob/2696f20d6df51fdb6091dfcd283ed07f to your computer and use it in GitHub Desktop.
Find Best Model ML
# Liner Regression
models = {
'LinearRegression' : LinearRegression(),
'Ridge' : Ridge(alpha=1.0),
'Lasso' : Lasso(),
'DecisionTreeRegressor' : DecisionTreeRegressor(),
'KNeighborsRegressor' : KNeighborsRegressor(),
'SVR' : SVR(),
}
def build_model(model_obj):
name, model = model_obj
if y_train.shape[1] > 1 and name == 'SVR':
return (None, None, None, None, None, None)
print('----------------------------------')
print('*****',name,'******')
print('----------------------------------')
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
mean_absolute_error_ = mean_absolute_error(y_test,y_pred)
mean_squared_error_ = mean_squared_error(y_test,y_pred)
r2_score_ = r2_score(y_test,y_pred)
kFold = KFold(n_splits=3,shuffle=True,random_state=42)
scores = cross_val_score(model,X,y,cv=kFold,scoring='r2',n_jobs=-1)
mean_cv_scores = np.mean(scores)
std_cv_scores = np.std(scores)
diff_acc_score_cv_score = np.abs(r2_score_ - mean_cv_scores)
print('mean_absolute_error: ',mean_absolute_error_)
print('mean_squared_error: ',mean_squared_error_)
print('r2_score: ',r2_score_)
print('Cross Val Score: ',mean_cv_scores)
print('Cross Val std: ',std_cv_scores)
print('Diff Between score and CV score: ', diff_acc_score_cv_score)
print('__________________________________________\n'*2)
return (name, r2_score, mean_cv_scores, std_cv_scores, diff_acc_score_cv_score, model)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,shuffle=True)
final_model_list = []
for one_model in models.items():
name, score, mean_cv_scores, std_cv_scores, diff_acc_score_cv_score, model = build_model(one_model)
if name != None:
final_model_list.append({
'name': name,
'score': score,
'mean_cv_scores': mean_cv_scores,
'std_cv_scores': std_cv_scores,
'diff_acc_score_cv_score': diff_acc_score_cv_score,
'model': model,
})
# The Best model is
sorted(final_model_list,key=lambda item : item['diff_acc_score_cv_score'])[0]['name']
lasso = sorted(final_model_list,key=lambda item : item['diff_acc_score_cv_score'])[0]['model']
# ----------------------------------------------------------------------------------------
models = {
'logisticRegression' : LogisticRegression(),
'decisionTreeClassifier' : DecisionTreeClassifier(),
'svc' : SVC(probability=True),
'kNeighborsClassifier' : KNeighborsClassifier()
}
def build_model(model_obj):
name, model = model_obj
print('----------------------------------')
print('*****',name,'******')
print('----------------------------------')
multi_class = False
if len(y_train.value_counts()) > 2:
multi_class = True
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
accuracy_score = metrics.accuracy_score(y_test,y_pred)
confusion_matrix = metrics.confusion_matrix(y_test,y_pred)
if not multi_class:
y_pred_proba = model.predict_proba(X_test)[::,1]
fpr, tpr , _ = metrics.roc_curve(y_test,y_pred_proba)
auc = metrics.roc_auc_score(y_test,y_pred_proba)
plt.figure(figsize=(5,5))
plt.plot(fpr,tpr)
plt.title('AUC ROC Curve for ' + name)
print('auc: ',auc)
plt.show()
kFold = KFold(n_splits=3,shuffle=True,random_state=42)
scores = cross_val_score(model,X,y,cv=kFold,scoring='accuracy',n_jobs=-1)
mean_cv_scores = np.mean(scores)
std_cv_scores = np.std(scores)
diff_acc_score_cv_score = np.abs(accuracy_score - mean_cv_scores)
print('Confussion matrix: \n',confusion_matrix)
print('\nscore: ',accuracy_score)
print('Cross Val Score: ',mean_cv_scores)
print('Cross Val std: ',std_cv_scores)
print('Diff Between score and CV score: ', diff_acc_score_cv_score)
print('__________________________________________\n'*2)
return (name, accuracy_score, mean_cv_scores, std_cv_scores, diff_acc_score_cv_score, model)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,shuffle=True)
final_model_list = []
for one_model in models.items():
name, accuracy_score, mean_cv_scores, std_cv_scores, diff_acc_score_cv_score, model = build_model(one_model)
final_model_list.append({
'name': name,
'accuracy_score': accuracy_score,
'mean_cv_scores': mean_cv_scores,
'std_cv_scores': std_cv_scores,
'diff_acc_score_cv_score': diff_acc_score_cv_score,
'model': model,
})
# The Best model is
sorted(final_model_list,key=lambda item : item['diff_acc_score_cv_score'])[0]['name']
lasso = sorted(final_model_list,key=lambda item : item['diff_acc_score_cv_score'])[0]['model']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment