Skip to content

Instantly share code, notes, and snippets.

@vaclavcadek
Created August 7, 2015 08:03
Show Gist options
  • Save vaclavcadek/d29bfd605c90fa91fe7f to your computer and use it in GitHub Desktop.
Save vaclavcadek/d29bfd605c90fa91fe7f to your computer and use it in GitHub Desktop.
Feature selection with cross-validation
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.datasets import make_classification
# Create the RFE object and compute a cross-validated score.
svc = SVC(kernel='linear')
# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2), scoring='roc_auc')
rfecv.fit(X, y)
print('Optimal number of features : %d' % rfecv.n_features_)
# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel('Number of features selected')
plt.ylabel('Cross validation score (nb of correct classifications)')
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment