Skip to content

Instantly share code, notes, and snippets.

@yuyasugano
Created October 30, 2019 11:32
Show Gist options
  • Save yuyasugano/72a093c92549a42303ad59b86888d356 to your computer and use it in GitHub Desktop.
Save yuyasugano/72a093c92549a42303ad59b86888d356 to your computer and use it in GitHub Desktop.
Univariate Statistics
# Univariate Statistics
from sklearn.feature_selection import SelectPercentile
select = SelectPercentile(percentile=25)
select.fit(X_train_full, y_train_full.values.ravel())
X_train_selected = select.transform(X_train_full)
X_test_selected = select.transform(X_test_full)
mask = select.get_support()
print(mask)
plt.matshow(mask.reshape(1, -1), cmap='gray_r')
plt.xlabel("Technical Indexes")
# GradientBoost Classifier
print('--------------------------Without Univariate Statistics-------------------------------------')
pipe_gb = Pipeline([('scl', StandardScaler()), ('est', GradientBoostingClassifier(random_state=39))])
pipe_gb.fit(X_train_full, y_train_full.values.ravel())
print('Train Accuracy: {:.3f}'.format(accuracy_score(y_train_full.values.ravel(), pipe_gb.predict(X_train_full))))
print('Test Accuracy: {:.3f}'.format(accuracy_score(y_test_full.values.ravel(), pipe_gb.predict(X_test_full))))
print('Train F1 Score: {:.3f}'.format(f1_score(y_train_full.values.ravel(), pipe_gb.predict(X_train_full), average='micro')))
print('Test F1 Score: {:.3f}'.format(f1_score(y_test_full.values.ravel(), pipe_gb.predict(X_test_full), average='micro')))
# GradientBoost Classifier with Univariate Statistics
print('---------------------------With Univariate Statistics--------------------------------------')
pipe_gb_percentile = Pipeline([('scl', StandardScaler()), ('est', GradientBoostingClassifier(random_state=39))])
pipe_gb_percentile.fit(X_train_selected, y_train_full.values.ravel())
print('Train Accuracy: {:.3f}'.format(accuracy_score(y_train_full.values.ravel(), pipe_gb_percentile.predict(X_train_selected))))
print('Test Accuracy: {:.3f}'.format(accuracy_score(y_test_full.values.ravel(), pipe_gb_percentile.predict(X_test_selected))))
print('Train F1 Score: {:.3f}'.format(f1_score(y_train_full.values.ravel(), pipe_gb_percentile.predict(X_train_selected), average='micro')))
print('Test F1 Score: {:.3f}'.format(f1_score(y_test_full.values.ravel(), pipe_gb_percentile.predict(X_test_selected), average='micro')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment