Created
September 5, 2012 04:12
-
-
Save ygabo/3630272 to your computer and use it in GitHub Desktop.
Naive Bayes Implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.io | |
import matplotlib.pyplot as pl | |
class Classifier ( object ): | |
def __init__ ( self ): | |
self.params = [ 'logpi', 'logtheta' ] | |
def fit (self , X, y): | |
data = scipy.io.mmread('tweets_small.mtx').tocsr () | |
X = data[: ,1:] | |
y = data[: ,0] | |
self.logpi = 0 | |
self.logtheta = 0 | |
def predict (self , X): | |
def save_params (self , fname ): | |
params = dict([(p, getattr (self , p)) for p in self . params ]) | |
np.savez(fname , ** params ) | |
def load_params (self , fname ): | |
params = np.load ( fname ) | |
for name in self.params : | |
setattr ( self, name , params [ name ] ) | |
def ridge( X, y, d2 ): | |
A = np.dot(X.T, X) | |
B = A + ( d2 * np.identity(8) ) | |
C = np.linalg.inv( B ) | |
D = np.dot( C, X.T ) | |
theta = np.dot(D, y) | |
return theta | |
def lasso(X, y, d2): | |
theta_ = np.zeros( X.shape[1] ) | |
theta = ridge( X, y, d2) | |
a = np.zeros( X.shape[0] ) | |
c = np.zeros( X.shape[0] ) | |
#calculate 'a' outside | |
for j in xrange( X.shape[1] ): | |
for i in xrange( X.shape[0] ): | |
a[j] += X[i][j]**2 | |
a[j] = a[j]*2 | |
#converge loop | |
while np.sum( np.abs( theta - theta_ ) ) > 1e-5: | |
#update theta_ | |
theta_ = theta.copy() | |
for j in xrange( X.shape[1] ): | |
c[j] = 0 | |
#calculate 'c' | |
for i in xrange( X.shape[0] ): | |
c[j] += X[i][j] * ( y[i] - np.dot(theta, X[i]) + theta[j]*X[i][j] ) | |
c[j] = c[j]*2 | |
#update theta_j | |
if c[j] < ( -1 * d2 ): | |
theta[j] = ( c[j] + d2 ) / a[j] | |
elif c[j] > d2: | |
theta[j] = (c[j] - d2 ) / a[j] | |
else: | |
theta[j] = 0 | |
return theta | |
X = np.loadtxt('prostate.data') | |
fig = pl.figure() | |
d2 = np.logspace(-5, 10, num=100) | |
y = X[:,-1] | |
X = X[:,0:-1] | |
y -= np.mean(y) | |
X -= np.mean(X, axis=0) | |
X /= np.std(X, axis=0) | |
dfa = ([]) | |
theta0 = ([]) | |
theta1 = ([]) | |
theta2 = ([]) | |
theta3 = ([]) | |
theta4 = ([]) | |
theta5 = ([]) | |
theta6 = ([]) | |
theta7 = ([]) | |
for i in xrange( d2.size ): | |
theta = lasso(X, y, d2[i]) | |
df = 0 | |
theta0 = np.append(theta0, theta[0]) | |
theta1 = np.append(theta1, theta[1]) | |
theta2 = np.append(theta2, theta[2]) | |
theta3 = np.append(theta3, theta[3]) | |
theta4 = np.append(theta4, theta[4]) | |
theta5 = np.append(theta5, theta[5]) | |
theta6 = np.append(theta6, theta[6]) | |
theta7 = np.append(theta7, theta[7]) | |
df = np.sum( np.absolute( theta ) ) | |
dfa = np.append(dfa,df) | |
#print zip(dfa, theta1) | |
pl.plot(dfa, theta0, '-', dfa, theta1, '-',dfa, theta2, '-',dfa, theta3, '-', | |
dfa, theta4, '-',dfa, theta5, '-',dfa, theta6, '-',dfa, theta7, '-',) | |
pl.plot(dfa, theta1, '-') | |
pl.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment