Skip to content

Instantly share code, notes, and snippets.

@fnielsen
Last active December 26, 2015 18:29
Show Gist options
  • Save fnielsen/7195096 to your computer and use it in GitHub Desktop.
Save fnielsen/7195096 to your computer and use it in GitHub Desktop.
def accuracy(truth, predicted):
if len(truth) != len(predicted):
raise Exception("Wrong sizes ...")
total = len(truth)
if total == 0:
return 0
hits = len(filter(lambda (x, y): x == y, zip(truth, predicted)))
return float(hits)/total
import pandas as pd
Pima_tr = pd.read_csv("Pima.tr.csv", index_col=0)
Pima_te = pd.read_csv("Pima.te.csv", index_col=0)
Pima_tr.groupby("type").count()
class NoClassifier():
"""Classifier that predict all data as "No". """
def predict(self, x):
return pd.Series(["No"] * x.shape[0])
no_classifier = NoClassifier()
predicted = no_classifier.predict(Pima_te)
accuracy(Pima_te.type, predicted)
from scipy.linalg import pinv
from numpy import asarray, hstack, mat, ones, where
class LinearClassifier():
""" y = X*b and b = pinv(X) * y """
def __init__(self):
self._parameters = None
def from_labels(self, y):
return mat(where(y=="No", -1, 1)).T
def to_labels(self, y):
return pd.Series(asarray(where(y<0, "No", "Yes")).flatten())
def train(self, x, y):
intercept = ones((x.shape[0], 1))
self._parameters = pinv(hstack((mat(x), intercept))) * self.from_labels(y)
def predict(self, x):
intercept = ones((x.shape[0], 1))
y_estimated = hstack((mat(x), intercept)) * self._parameters
return self.to_labels(y_estimated)
lc = LinearClassifier()
lc.train(Pima_tr.ix[:,:7], Pima_tr.type)
predicted = lc.predict(Pima_te.ix[:,:7])
accuracy(Pima_te.type, predicted)
from numpy import where
X_tr = Pima_tr.ix[:,:7]
y_tr = where(Pima_tr.type=="No", -1, 1)
X_te = Pima_te.ix[:,:7]
y_te = where(Pima_te.type=="No", -1, 1)
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_tr, y_tr)
predicted = where(rfc.predict(X_te)==-1, "No", "Yes")
accuracy(Pima_te.type, predicted)
rfc.score(X_te, y_te)
# Modified slightly from http://scikit-learn.org/stable/auto_examples/plot_roc.html
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
fpr, tpr, thresholds = roc_curve(y_te, rfc.predict_proba(X_te)[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment