Skip to content

Instantly share code, notes, and snippets.

@shunsukeaihara
Created January 23, 2013 08:44
Show Gist options
  • Save shunsukeaihara/4603227 to your computer and use it in GitHub Desktop.
Save shunsukeaihara/4603227 to your computer and use it in GitHub Desktop.
Mixture model for graph
# -*- coding: utf-8 -*-
import math
import numpy as NP
from scipy.maxentropy import logsumexp
from operator import itemgetter
class NewmanEM:
def __init__(sel):
pass
def ll(self):
"""対数尤度の計算"""
res=0.0
for i in range(self.size):
for r in range(self.groups):
sum=0.0
for j in self.outEdge[i]:
sum+=self.theta[r,j]
if math.exp(self.q[i,r])>0.0:
res+=math.exp(self.q[i,r])*(self.pi[r]+sum)
return res
def initialize(self,gp,mat,th=0.0):
"""パラメータの初期化"""
self.groups=gp
self.size=mat.shape[0]
x=NP.random.random(gp)
self.pi=x/NP.sum(x)
x=NP.matrix(NP.random.random((gp,self.size)))
self.theta=x/x.sum(1)
self.pi = NP.log(self.pi)
self.theta = NP.log(self.theta)
self.q=NP.matrix(NP.zeros((self.size,gp)))
self.outEdge=[]#in edgeの保存
for i in range(self.size):
row=mat[i]
self.outEdge.append([i for i in range(len(row)) if row[i]>th])
self.inEdge=[]#out edgeの保存
mat=mat.T
for i in range(self.size):
row=mat[i]
self.inEdge.append([i for i in range(len(row)) if row[i]>th])
pass
def clustering(self,mat,vocab,gp):
pi,q,theta,ll=self.em(mat,gp,1000)
self.showGmResult(q,theta,vocab,gp)
def showGmResult(self,q,theta,vocab,gp):
for r in range(gp):
dic={}
for i in range(len(vocab)):
th=theta[r,i]
dic[vocab[i]]=math.exp(th)
ar=sorted(dic.items(), key=itemgetter(1), reverse=True)
print "group " + str(r)
for x in ar[0:10]:
print x[0] + ":" + str(x[1])
print "----"
for r in range(gp):
for i in range(len(vocab)):
th=math.exp(q[i,r])
dic[vocab[i]]=th
ar=sorted(dic.items(), key=itemgetter(1), reverse=True)
print "group " + str(r)
for x in ar[0:10]:
print x[0] + ":" + str(x[1])
print "----"
def em(self,mat,gp,step):
"""EMアルゴリズムで計算"""
self.initialize_em(gp,mat)
for st in range(step):
qold=self.q.copy()
#E-step qを計算
for i in range(self.size):
edge=self.outEdge[i]
for r in range(self.groups):
self.q[i,r]=self.pi[r]
for t in edge:
self.q[i,r]+=(self.theta[r,t])
pass#edge
pass#j
self.q[i]=self.q[i]-logsumexp(self.q[i])
pass
#M-step theta,piを計算
for r in range(self.groups):
ar=[]
for i in range(self.size):
ar.append(self.q[i,r]+NP.log(len(self.outEdge[i])))
for j in range(self.size):
edge=self.inEdge[j]
ar2=[]
for t in edge:
ar2.append(self.q[t,r])
self.theta[r,j]=logsumexp(ar2)-logsumexp(ar)
pass#j
pass#i
tq=self.q.T
for i in range(self.groups):
self.pi[i]=logsumexp(tq[i])-math.log(self.size)
deltasq=0.0
for i in range(self.size):
for j in range(self.groups):
deltasq+=((NP.exp(qold[i,j])-NP.exp(self.q[i,j]))**2)
delta=math.sqrt(deltasq)
print "iter " + str(st) + ",d " + str(delta)
if delta==0.0 or delta==float("inf"):
break
pass#end of iteration
return self.pi,self.q,self.theta,self.ll()
if __name__ == '__main__':
#隣接行列
mat=NP.array([[0.0,1,1,1,1,0,0,0,0],
[1.0,0,0,0,0,1,1,1,0],
[1,0,0,1,1,0,0,0,0],
[1,0,1,0,1,0,1,0,0],
[1,0,1,1,0,0,0,0,1],
[0,1,0,0,0,0,1,1,1],
[0,1,0,0,0,1,0,1,0],
[0,1,1,0,0,1,1,0,0],
[0,0,0,0,1,1,0,0,0]
])
em=NewmanEM()
pi,q,theta,ll=em.em(mat,2,1000)
print NP.exp(pi)
print NP.exp(q)
print NP.exp(theta)
print ll
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment