Skip to content

Instantly share code, notes, and snippets.

@coela
Created March 24, 2015 04:18
Show Gist options
  • Save coela/ef4f9aab7f7d4176cb25 to your computer and use it in GitHub Desktop.
Save coela/ef4f9aab7f7d4176cb25 to your computer and use it in GitHub Desktop.
#-*- coding:utf-8 -*-
import sys
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import cross_val_score
from sklearn.datasets import make_blobs
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
argvs = sys.argv
#train = np.genfromtxt(open(argvs[1],'r'), delimiter = " ")
#train = np.nan_to_num(train)
#print train
#training_data = train[:, 1:]
#training_label = train[:, 0]
from sklearn.datasets import load_iris
iris = load_iris()
training_data = iris.data
training_label = iris.target
print training_label
forest = ExtraTreesClassifier(n_estimators=1000,
random_state=0,n_jobs=16,max_depth=None,min_samples_split=1)
clf = forest.fit(training_data, training_label)
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
feature_num = training_data.shape[1]
print("Feature ranking:")
for f in range(feature_num):
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
scores = cross_val_score(clf, training_data,training_label)
print scores
print scores.mean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment