Skip to content

Instantly share code, notes, and snippets.

@nicolamontecchio
Created August 25, 2014 19:50
Show Gist options
  • Save nicolamontecchio/69dd8eb9597d51b2e0c9 to your computer and use it in GitHub Desktop.
Save nicolamontecchio/69dd8eb9597d51b2e0c9 to your computer and use it in GitHub Desktop.
template for logistic regression w/ graph and all from csv file
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import precision_recall_curve
dfp = PATH_TO_FILE_HERE # format is csv with itemid,label,features
def readff(fpath):
trids, labs, feats = [], [], []
for l in open(fpath):
ll = l.strip().split(',')
trids.append(ll[0])
labs.append(int(ll[1]))
feats.append(np.array([float(f) for f in ll[2:]]))
return trids, np.array(labs), np.array(feats)
classifier = SGDClassifier(fit_intercept=False)
T, L, X = readff(dfp)
def shuffle(X, Y):
s = np.random.get_state()
np.random.shuffle(X)
np.random.set_state(s)
np.random.shuffle(Y)
return X, Y
Tt, Tv, Lt, Lv, Xt, Xv = train_test_split(T, L, X, test_size=0.1)
Xt, Lt = shuffle(Xt, Lt)
classifier.fit(Xt, Lt)
Tv = classifier.decision_function(Xv)
clf()
pp, rr, _ = precision_recall_curve(Lv, Tv)
plot(rr, pp)
pp, rr, _ = precision_recall_curve(-Lv, 1 - Tv)
plot(rr, pp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment