PranjalDureja0002’s gists

PranjalDureja0002 / model

Created March 2, 2021 16:12

model

	from sklearn.metrics import roc_curve, auc
	best_depth=dt_grid.best_params_['max_depth']
	best_samples=dt_grid.best_params_['min_samples_split']
	dt_1 = DecisionTreeClassifier(max_depth=best_depth,min_samples_split=best_samples)

	dt_1.fit(X_train, y_train)
	# roc_auc_score(y_true, y_score) the 2nd parameter should be probability estimates of the positive class
	# not the predicted outputs

	y_train_pred = pred_func(dt_1,X_train)

PranjalDureja0002 / model

Created March 2, 2021 16:15

model

	+---------------+-----------------+----------------+----------------+---------------+
	\| Model \| Train AUC Score \| Test AUC Score \| Train F1 Score \| Test F1 Score \|
	+---------------+-----------------+----------------+----------------+---------------+
	\| Decision_Tree \| 0.9967 \| 0.9909 \| 0.99314 \| 0.9771 \|
	+---------------+-----------------+----------------+----------------+---------------+

PranjalDureja0002 / model

Created March 2, 2021 16:19

model

	# A parameter grid for XGBoost
	parameters = {
	'n_estimators': [100,500,1000],
	'learning_rate': [0.1, 0.01, 0.05]
	}

	xgb = XGBClassifier(objective='binary:logistic',
	silent=True, nthread=4)

	xg_grid = GridSearchCV(xgb, param_grid=parameters, n_jobs=-1, verbose=1,scoring='f1_macro',cv=3,return_train_score=True)

PranjalDureja0002 / model

Created March 2, 2021 16:20

model

	training score: 0.9998006310038143
	testing score: 0.990791808142236

PranjalDureja0002 / model

Created March 2, 2021 16:21

model

	feature_important = xg1.get_booster().get_score(importance_type='weight')
	keys = list(feature_important.keys())
	values = list(feature_important.values())

	data = pd.DataFrame(data=values, index=keys, columns=["score"]).sort_values(by = "score", ascending=False)
	data.head(20)

PranjalDureja0002 / model

Created March 2, 2021 16:22

model

PranjalDureja0002 / model

Created March 2, 2021 16:23

model

	s1 = data['score']
	s_s1 = sum(s1.tolist())
	s_s1

	plt.style.use('fivethirtyeight')
	ax=data.head(20).plot(kind = 'barh' , color = 'red')
	for p in ax.patches:
	percentage = '{:.1f}%'.format(100 * p.get_width()/s_s1)
	x = p.get_x() + p.get_width() - 0.5
	y = p.get_y() + p.get_height()

PranjalDureja0002 / model

Created March 2, 2021 16:25

model

	+---------+-----------------+----------------+----------------+---------------+
	\| Model \| Train AUC Score \| Test AUC Score \| Train F1 Score \| Test F1 Score \|
	+---------+-----------------+----------------+----------------+---------------+
	\| XgBoost \| 0.99938 \| 0.99855 \| 0.9998 \| 0.990791 \|
	+---------+-----------------+----------------+----------------+---------------+

PranjalDureja0002 / model

Created March 2, 2021 16:27

model

	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import roc_auc_score
	x_cfl=XGBClassifier(n_estimators=1000,nthread=-1)
	x_1=XGBClassifier(n_estimators=500,nthread=-1)
	x_2=XGBClassifier(n_estimators=500,nthread=-1)

	x_3 = DecisionTreeClassifier(max_depth=best_depth,min_samples_split=best_samples,class_weight='balanced')
	x_4 = LogisticRegression(class_weight='balanced')
	s_clf = StackingClassifier(classifiers=[x_1,x_2,x_3,x_4],meta_classifier=x_cfl)
	s_clf.fit(X_train,y_train)

PranjalDureja0002 / model

Created March 2, 2021 16:27

model

	+---------------------+-----------------+----------------+----------------+---------------+
	\| Model \| Train AUC Score \| Test AUC Score \| Train F1 Score \| Test F1 Score \|
	+---------------------+-----------------+----------------+----------------+---------------+
	\| Stacking_Classifier \| 0.99537 \| 0.9902 \| 0.99429 \| 0.98759 \|
	+---------------------+-----------------+----------------+----------------+---------------+