-
-
Save LaoLiulaoliu/fd523d309f6ccb27d227 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pylab as pl | |
import pandas as pd | |
from sklearn import svm | |
from sklearn import linear_model | |
from sklearn import tree | |
from sklearn.metrics import confusion_matrix | |
x_min, x_max = 0, 15 | |
y_min, y_max = 0, 10 | |
step = .1 | |
# to plot the boundary, we're going to create a matrix of every possible point | |
# then label each point as a wolf or cow using our classifier | |
xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step)) | |
df = pd.DataFrame(data={'x': xx.ravel(), 'y': yy.ravel()}) | |
df['color_gauge'] = (df.x-7.5)**2 + (df.y-5)**2 | |
df['color'] = df.color_gauge.apply(lambda x: "red" if x <= 15 else "green") | |
df['color_as_int'] = df.color.apply(lambda x: 0 if x=="red" else 1) | |
print "Points on flag:" | |
print df.groupby('color').size() | |
figure = 1 | |
# plot a figure for the entire dataset | |
for color in df.color.unique(): | |
idx = df.color==color | |
pl.subplot(2, 2, figure) | |
pl.scatter(df[idx].x, df[idx].y, color=color) | |
pl.title('Actual') | |
train_idx = df.x < 10 | |
train = df[train_idx] | |
test = df[-train_idx] | |
print "Training Set Size: %d" % len(train) | |
print "Test Set Size: %d" % len(test) | |
# train using the x and y position coordiantes | |
cols = ["x", "y"] | |
clfs = { | |
"SVM": svm.SVC(degree=0.5), | |
"Logistic" : linear_model.LogisticRegression(), | |
"Decision Tree": tree.DecisionTreeClassifier() | |
} | |
# racehorse different classifiers and plot the results | |
for clf_name, clf in clfs.iteritems(): | |
figure += 1 | |
# train the classifier | |
clf.fit(train[cols], train.color_as_int) | |
# get the predicted values from the test set | |
test['predicted_color_as_int'] = clf.predict(test[cols]) | |
test['pred_color'] = test.predicted_color_as_int.apply(lambda x: "red" if x==0 else "green") | |
# create a new subplot on the plot | |
pl.subplot(2, 2, figure) | |
# plot each predicted color | |
for color in test.pred_color.unique(): | |
# plot only rows where pred_color is equal to color | |
idx = test.pred_color==color | |
pl.scatter(test[idx].x, test[idx].y, color=color) | |
# plot the training set as well | |
for color in train.color.unique(): | |
idx = train.color==color | |
pl.scatter(train[idx].x, train[idx].y, color=color) | |
# add a dotted line to show the boundary between the training and test set | |
# (everything to the right of the line is in the test set) | |
#this plots a vertical line | |
train_line_y = np.linspace(y_min, y_max) #evenly spaced array from 0 to 10 | |
train_line_x = np.repeat(10, len(train_line_y)) #repeat 10 (threshold for traininset) n times | |
# add a black, dotted line to the subplot | |
pl.plot(train_line_x, train_line_y, 'k--', color="black") | |
pl.title(clf_name) | |
print "Confusion Matrix for %s:" % clf_name | |
print confusion_matrix(test.color, test.pred_color) | |
pl.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment