Created
November 18, 2015 01:05
-
-
Save stormxuwz/d0dddd2b4d5b9c63d2ea to your computer and use it in GitHub Desktop.
some online algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
class OnlineClf(object): | |
def __init__(self,iterNum,R): | |
self.w=None | |
self.iterNum=iterNum | |
self.misNum=[[0,0]] | |
self.accuracy=0 | |
self.name="oneline Clf" | |
self.R=R | |
def fit(self,X,Y): | |
self.w=self.initializeWeights(X.shape[1]) | |
continousCorrect=0 | |
convergent=False | |
for k in range(self.iterNum): | |
if self.R>0: | |
if convergent: | |
break | |
for i in range(X.shape[0]): | |
sample_x=X[i,:].copy() | |
sample_y=Y[i].copy() | |
predScore=self.predict(sample_x) | |
if sample_y*np.sign(predScore)<0: | |
self.misNum.append([i+1,1]) | |
continousCorrect=0 | |
else: | |
continousCorrect+=1 | |
if self.R>0: | |
if continousCorrect>self.R: | |
print "Convergent!" | |
convergent=True | |
break | |
self.w=self.weightsUpdate(predScore,sample_x,sample_y) | |
if (convergent is False) and (self.R>0): | |
print "not convergent!" | |
self.misNum=np.array(self.misNum) | |
cumSum=np.cumsum(self.misNum[:,1]) | |
self.misNum[:,1]=cumSum | |
def predict(self,X,score=True): | |
wx=self.w[-1]+np.dot(X,self.w[:-1]).flatten() | |
if score: | |
return wx | |
else: | |
return np.sign(wx) # Treat boundary value as negative label | |
def weightsUpdate(self,i,x,y): | |
''' | |
x: a single sample | |
y: truth label for x | |
w: original weights | |
''' | |
return 0 | |
def initializeWeights(self,n): | |
''' | |
Initialize the weights | |
n: the number of features in the data | |
''' | |
pass | |
def evaluation(self,testX,testY): | |
pred=self.predict(testX,score=False) | |
accuracy=np.sum(pred==testY)/len(testY) | |
return {"accuracy":accuracy,"misNum":self.misNum[-1,1],"accuracyNum":np.sum(pred==testY),"error":1-accuracy} | |
class Perceptron(OnlineClf): | |
def __init__(self,dataIter=20,eta=1,margin=0,R=0): | |
super(Perceptron, self).__init__(dataIter,R) | |
self.eta=eta | |
self.margin=margin | |
if self.margin>0: | |
self.name="Perceptron with margin" | |
else: | |
self.name="Perceptron" | |
def __str__(self): | |
return str({"name":self.name,"eta":self.eta,"margin":self.margin,"R":self.R}) | |
def weightsUpdate(self,predScore,x,y): | |
w=self.w.copy() | |
if y*predScore>self.margin: | |
return w | |
else: | |
# Update weights | |
w[:-1]=w[:-1]+self.eta*y*x | |
w[-1]=w[-1]+self.eta*y | |
return w | |
def initializeWeights(self,n): | |
return np.zeros(n+1) | |
class Perceptron2(OnlineClf): | |
def __init__(self,dataIter=20,eta=1,margin=0,R=0,beta=2): | |
super(Perceptron2, self).__init__(dataIter,R) | |
self.eta=eta | |
self.margin=margin | |
self.beta=beta | |
if self.margin>0: | |
self.name="Modified Perceptron" | |
else: | |
self.name="Modified Perceptron" | |
def __str__(self): | |
return str({"name":self.name,"eta":self.eta,"margin":self.margin,"R":self.R,"beta":self.beta}) | |
def weightsUpdate(self,predScore,x,y): | |
w=self.w.copy() | |
if y*predScore>self.beta*self.margin and y>0: | |
return w | |
elif y*predScore>self.margin and y<0: | |
return w | |
else: | |
w[:-1]=w[:-1]+self.eta*y*x | |
w[-1]=w[-1]+self.eta*y | |
return w | |
def initializeWeights(self,n): | |
return np.zeros(n+1) | |
class Winnow(OnlineClf): | |
def __init__(self,dataIter=20,alpha=1.1,margin=0,R=0): | |
super(Winnow, self).__init__(dataIter,R) | |
self.alpha=alpha | |
self.margin=margin | |
if self.margin>0: | |
self.name="Winnow with margin" | |
else: | |
self.name="Winnow" | |
def __str__(self): | |
return str({"name":self.name,"alpha":self.alpha,"margin":self.margin,"R":self.R}) | |
def initializeWeights(self,n): | |
w=np.zeros(n+1)+1 | |
w[-1]=-n | |
return w | |
def weightsUpdate(self,predScore,x,y): | |
w=self.w.copy() | |
if y*predScore>self.margin: | |
return w | |
else: | |
w[:-1]=w[:-1]*self.alpha**(y*x) | |
return w | |
class AdaGrad(OnlineClf): | |
def __init__(self,dataIter=20,eta=1.5,R=0): | |
super(AdaGrad, self).__init__(dataIter,R) | |
self.eta=eta | |
self.name="AdaGrad" | |
self.Gt=None | |
def __str__(self): | |
return str({"name":self.name,"eta":self.eta,"R":self.R}) | |
def weightsUpdate(self,predScore,x,y): | |
w=self.w.copy() | |
if y*predScore>1: | |
return w | |
else: | |
gt=self.w.copy() | |
gt[:-1]=-y*x | |
gt[-1]=-y | |
self.Gt=self.Gt+gt**2 | |
# print self.Gt | |
positiveIndex,=np.where(self.Gt>0) | |
w[positiveIndex]=w[positiveIndex]-self.eta*gt[positiveIndex]/np.sqrt(self.Gt[positiveIndex]) | |
return w | |
def initializeWeights(self,n): | |
self.Gt=np.zeros(n+1) | |
return np.zeros(n+1) | |
if __name__ == '__main__': | |
import comparison | |
handler=comparison.OnlineClfComparison() | |
handler.readData('data_1_500.mat') | |
X=handler.trainX[:5000,:] | |
Y=handler.trainY[:5000] | |
# print X.shape #=(5000,500) | |
# print Y.shape #=(5000,) | |
testX=handler.trainX[-5000:,:] | |
testY=handler.trainY[-5000:] | |
clfList=[Perceptron,Winnow,AdaGrad] | |
for clf in clfList: | |
myClf=clf() | |
myClf.fit(X,Y) | |
print myClf, | |
print myClf.evaluation(testX,testY) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment