Skip to content

Instantly share code, notes, and snippets.

@qharlie
Created February 24, 2017 20:32
Show Gist options
  • Save qharlie/8106b1d350b422833fe7af219b1de76b to your computer and use it in GitHub Desktop.
Save qharlie/8106b1d350b422833fe7af219b1de76b to your computer and use it in GitHub Desktop.
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.3,
random_state=42)
clf = DecisionTreeClassifier(random_state=RANDOM_STATE)
param_grid = [
{
"pca__n_components": range(1, len(features_list) - 1, 1),
"selectatmostkbest__k": [2, 3, 4, 5, 6, 'all'],
"decisiontreeclassifier__criterion": ['gini', 'entropy'],
"decisiontreeclassifier__splitter": ['best', 'random'],
"decisiontreeclassifier__presort": [True, False]
}
]
pipe = make_pipeline(StandardScaler(), PCA(n_components=len(features_list) - 1),
SelectAtMostKBest(k=len(features_list) - 1), clf)
cv = StratifiedShuffleSplit()
search = GridSearchCV(pipe, params, cv=cv, scoring=score, n_jobs=-1)
search.fit(features_train, labels_train)
search.score(features_test, labels_test)
print("BEST SCORE = {}, CLF = {}".format(str(search.best_score_), clf))
print("BEST PARAMS = " + str(search.best_params_))
print("BEST ESTIMATOR = " + str(search.best_estimator_))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment