Last active
April 21, 2024 13:42
-
-
Save mblondel/656147 to your computer and use it in GitHub Desktop.
Kernel Perceptron
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Mathieu Blondel, October 2010 | |
# License: BSD 3 clause | |
import numpy as np | |
from numpy import linalg | |
def linear_kernel(x1, x2): | |
return np.dot(x1, x2) | |
def polynomial_kernel(x, y, p=3): | |
return (1 + np.dot(x, y)) ** p | |
def gaussian_kernel(x, y, sigma=5.0): | |
return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2))) | |
class Perceptron(object): | |
def __init__(self, T=1): | |
self.T = T | |
def fit(self, X, y): | |
n_samples, n_features = X.shape | |
self.w = np.zeros(n_features, dtype=np.float64) | |
self.b = 0.0 | |
for t in range(self.T): | |
for i in range(n_samples): | |
if self.predict(X[i])[0] != y[i]: | |
self.w += y[i] * X[i] | |
self.b += y[i] | |
def project(self, X): | |
return np.dot(X, self.w) + self.b | |
def predict(self, X): | |
X = np.atleast_2d(X) | |
return np.sign(self.project(X)) | |
class KernelPerceptron(object): | |
def __init__(self, kernel=linear_kernel, T=1): | |
self.kernel = kernel | |
self.T = T | |
def fit(self, X, y): | |
n_samples, n_features = X.shape | |
#np.hstack((X, np.ones((n_samples, 1)))) | |
self.alpha = np.zeros(n_samples, dtype=np.float64) | |
# Gram matrix | |
K = np.zeros((n_samples, n_samples)) | |
for i in range(n_samples): | |
for j in range(n_samples): | |
K[i,j] = self.kernel(X[i], X[j]) | |
for t in range(self.T): | |
for i in range(n_samples): | |
if np.sign(np.sum(K[:,i] * self.alpha * y)) != y[i]: | |
self.alpha[i] += 1.0 | |
# Support vectors | |
sv = self.alpha > 1e-5 | |
ind = np.arange(len(self.alpha))[sv] | |
self.alpha = self.alpha[sv] | |
self.sv = X[sv] | |
self.sv_y = y[sv] | |
print "%d support vectors out of %d points" % (len(self.alpha), | |
n_samples) | |
def project(self, X): | |
y_predict = np.zeros(len(X)) | |
for i in range(len(X)): | |
s = 0 | |
for a, sv_y, sv in zip(self.alpha, self.sv_y, self.sv): | |
s += a * sv_y * self.kernel(X[i], sv) | |
y_predict[i] = s | |
return y_predict | |
def predict(self, X): | |
X = np.atleast_2d(X) | |
n_samples, n_features = X.shape | |
#np.hstack((X, np.ones((n_samples, 1)))) | |
return np.sign(self.project(X)) | |
if __name__ == "__main__": | |
import pylab as pl | |
def gen_lin_separable_data(): | |
# generate training data in the 2-d case | |
mean1 = np.array([0, 2]) | |
mean2 = np.array([2, 0]) | |
cov = np.array([[0.8, 0.6], [0.6, 0.8]]) | |
X1 = np.random.multivariate_normal(mean1, cov, 100) | |
y1 = np.ones(len(X1)) | |
X2 = np.random.multivariate_normal(mean2, cov, 100) | |
y2 = np.ones(len(X2)) * -1 | |
return X1, y1, X2, y2 | |
def gen_non_lin_separable_data(): | |
mean1 = [-1, 2] | |
mean2 = [1, -1] | |
mean3 = [4, -4] | |
mean4 = [-4, 4] | |
cov = [[1.0,0.8], [0.8, 1.0]] | |
X1 = np.random.multivariate_normal(mean1, cov, 50) | |
X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50))) | |
y1 = np.ones(len(X1)) | |
X2 = np.random.multivariate_normal(mean2, cov, 50) | |
X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50))) | |
y2 = np.ones(len(X2)) * -1 | |
return X1, y1, X2, y2 | |
def gen_lin_separable_overlap_data(): | |
# generate training data in the 2-d case | |
mean1 = np.array([0, 2]) | |
mean2 = np.array([2, 0]) | |
cov = np.array([[1.5, 1.0], [1.0, 1.5]]) | |
X1 = np.random.multivariate_normal(mean1, cov, 100) | |
y1 = np.ones(len(X1)) | |
X2 = np.random.multivariate_normal(mean2, cov, 100) | |
y2 = np.ones(len(X2)) * -1 | |
return X1, y1, X2, y2 | |
def split_train(X1, y1, X2, y2): | |
X1_train = X1[:90] | |
y1_train = y1[:90] | |
X2_train = X2[:90] | |
y2_train = y2[:90] | |
X_train = np.vstack((X1_train, X2_train)) | |
y_train = np.hstack((y1_train, y2_train)) | |
return X_train, y_train | |
def split_test(X1, y1, X2, y2): | |
X1_test = X1[90:] | |
y1_test = y1[90:] | |
X2_test = X2[90:] | |
y2_test = y2[90:] | |
X_test = np.vstack((X1_test, X2_test)) | |
y_test = np.hstack((y1_test, y2_test)) | |
return X_test, y_test | |
def plot_margin(X1_train, X2_train, clf): | |
def f(x, w, b, c=0): | |
# given x, return y such that [x,y] in on the line | |
# w.x + b = c | |
return (-w[0] * x - b + c) / w[1] | |
pl.plot(X1_train[:,0], X1_train[:,1], "ro") | |
pl.plot(X2_train[:,0], X2_train[:,1], "bo") | |
# w.x + b = 0 | |
a0 = -4; a1 = f(a0, clf.w, clf.b) | |
b0 = 4; b1 = f(b0, clf.w, clf.b) | |
pl.plot([a0,b0], [a1,b1], "k") | |
pl.axis("tight") | |
pl.show() | |
def plot_contour(X1_train, X2_train, clf): | |
pl.plot(X1_train[:,0], X1_train[:,1], "ro") | |
pl.plot(X2_train[:,0], X2_train[:,1], "bo") | |
pl.scatter(clf.sv[:,0], clf.sv[:,1], s=100, c="g") | |
X1, X2 = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50)) | |
X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))]) | |
Z = clf.project(X).reshape(X1.shape) | |
pl.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower') | |
pl.axis("tight") | |
pl.show() | |
def test_linear(): | |
X1, y1, X2, y2 = gen_lin_separable_data() | |
#X1, y1, X2, y2 = gen_lin_separable_overlap_data() | |
X_train, y_train = split_train(X1, y1, X2, y2) | |
X_test, y_test = split_test(X1, y1, X2, y2) | |
clf = Perceptron(T=3) | |
clf.fit(X_train, y_train) | |
y_predict = clf.predict(X_test) | |
correct = np.sum(y_predict == y_test) | |
print "%d out of %d predictions correct" % (correct, len(y_predict)) | |
plot_margin(X_train[y_train==1], X_train[y_train==-1], clf) | |
def test_kernel(): | |
X1, y1, X2, y2 = gen_non_lin_separable_data() | |
X_train, y_train = split_train(X1, y1, X2, y2) | |
X_test, y_test = split_test(X1, y1, X2, y2) | |
clf = KernelPerceptron(gaussian_kernel, T=20) | |
clf.fit(X_train, y_train) | |
y_predict = clf.predict(X_test) | |
correct = np.sum(y_predict == y_test) | |
print "%d out of %d predictions correct" % (correct, len(y_predict)) | |
plot_contour(X_train[y_train==1], X_train[y_train==-1], clf) | |
test_linear() | |
I want that in the scikit :)
self.alpha[i] += 1.0
should be self.alpha[i] += y[i]?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good catch, fixed :)