Created
August 7, 2015 08:03
-
-
Save vaclavcadek/d29bfd605c90fa91fe7f to your computer and use it in GitHub Desktop.
Feature selection with cross-validation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
from sklearn.svm import SVC | |
from sklearn.cross_validation import StratifiedKFold | |
from sklearn.feature_selection import RFECV | |
from sklearn.datasets import make_classification | |
# Create the RFE object and compute a cross-validated score. | |
svc = SVC(kernel='linear') | |
# The "accuracy" scoring is proportional to the number of correct | |
# classifications | |
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2), scoring='roc_auc') | |
rfecv.fit(X, y) | |
print('Optimal number of features : %d' % rfecv.n_features_) | |
# Plot number of features VS. cross-validation scores | |
plt.figure() | |
plt.xlabel('Number of features selected') | |
plt.ylabel('Cross validation score (nb of correct classifications)') | |
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment