tinylamb · April 9, 2014 14:10
diff --git a/rank.py b/rank.py
 # coding:utf-8
 __author__ = 'tinylamb'

 import os
 import math
 FILEPATH = os.getcwd()
 PERIOD = 5
 NOW = 122 / PERIOD + 1 # 122
 MID = NOW / 2
 GAP = 0.5
 SCORE = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a + 1
 SIMILAR = 0.3


 # user : {brand : {week :[0,1,2,3]}}
 def Initial (filename):
    f = open(filename ,'r')
    data = [d.split() for d in f.readlines()] # userid,brandid,type,time
    user = {}
    for d in data :
        user.setdefault(d[0] , {})
        user[d[0]].setdefault(d[1],{})
        week = int(d[3]) / PERIOD
        user[d[0]][d[1]].setdefault(week , [0,0,0,0])
        user[d[0]][d[1]][week][int(d[2])] += 1
    return user

 # cluster = user: {week :{brand :[0,1,2,3]}}
 def Cluster (filename):
    f = open(filename,'r')
    data = [d.split() for d in f.readlines()]
    cluster = {}
    for d in data :
        cluster.setdefault(d[0],{})
        week = str(int(d[3]) / PERIOD)
        cluster[d[0]].setdefault(week , {})
        cluster[d[0]][week].setdefault(d[1] ,[0,0,0,0])
        cluster[d[0]][week][d[1]][int(d[2])] += 1
    f = open(os.getcwd()+'/cluster.txt','w')
    for c in cluster :
        brand = cluster[c]
        for w in brand :
            for b in brand[w]:
                click ,buy,fav,add = brand[w][b][0],brand[w][b][1],brand[w][b][2],brand[w][b][3]
                s = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a
                score = s(click,buy,fav,add)
                brand[w][b] = round(score ,3)
                string = c + '\t' + b + '\t' + str(round(score,3)) + '\t' + w + '\n'
                f.write(string)
    return cluster

 # pref = user :{brand :{week1 : score ,week2 : score}
 def Pref(user):
    pref = user.copy()
    for u in pref:
        brand = pref[u]
        for b in brand :
            #click ,buy,fav,add = 0,0,0,0
            for w in brand[b]:
                click = brand[b][w][0]
                buy = brand[b][w][1]
                fav = brand[b][w][2]
                add = brand[b][w][3]
                score = SCORE(click , buy ,fav ,add)
                #brand[b][w].append(score)
                brand[b][w] = round(score , 3)
    return pref

 def Jaccard(b1 , b2):
    w1 = set(b1.keys())
    w2 = set(b2.keys())
    j = (len(w1.intersection(w2)) + 0.0) / len(w1.union(w2))
    return j


 def Simi(pref , uid): # similarity of User's brand
    brand = pref[uid]
    simmatrix = {}
    for b1 in brand :
        simmatrix.setdefault(b1,{})
        for b2 in brand:
            simi = Jaccard(brand[b1] , brand[b2])
            simmatrix[b1][b2] = simi
    #for b in simmatrix:
    #    simmatrix[b] = sorted(simmatrix[b],key= lambda x:x[1],reverse=True)
    return simmatrix # brand :{b1 :s1 ,b2:s2}

 def Recomend_history(pref , uid):
    brand = pref[uid]
    sim_matrix = Simi(pref , uid)
    recommend = {}
    done = {}
    for b in brand:
        if done.has_key(b):
            continue
        similar = [k for k,v in sim_matrix[b].items() if v > SIMILAR]
        similar.append(b)
        p = Pick(pref , uid , similar)
        if p != '':
            recommend.setdefault(p ,0)
            recommend[p] += 1
        #for s in similar :
        #    done[s] = 1
    #recommend = [(k,v) for k,v in recommend.items() if v/2 > 1]
    recommend = [k for k,v in recommend.items() if v/2 >= 2]
    return recommend

 def Pick(pref , uid ,similar):
    maxscore = 0
    predict = ''
    for s in similar:
        trend = pref[uid][s] # {w1:s1,w2:s2...}
        score = sum(trend.values())
        if score > maxscore :
            maxscore = score
            predict = s
    line = pref[uid][predict]
    recent = max(line.keys())
    if recent > MID and line[recent] > 1.1:
        return predict
    else:
        return ''



 # 计算用户的品牌热度
 def Hot (user , uid):
    brand = user[uid] # brand : {w1:[0,1,2,3] , w2:[0,1,2,3]}
    hot = {}
    #f = open(FILEPATH + '/finalscore.txt' ,'a')
    #f1 = open(FILEPATH + '/cluster.txt','a')
    for b in brand:
        click , buy , fav ,add ,recent = 0,0,0,0,0
        week = sorted(brand[b].keys())
        fre_buy = 0 #购买频率
        frequence = len(week) #浏览频率
        first = week[0]
        last = week[-1]
        for w in week:
            #c , bu , f , a = brand[b][w][0],brand[b][w][1],brand[b][w][2],brand[b][w][3]
            #w_score = SCORE(c, bu , f ,a)
            #string1 = uid + '\t' + b + '\t' + str(round(w_score ,3)) + '\t' + str(w) + '\n'
            #f1.write(string1)
            click += brand[b][w][0]
            buy += brand[b][w][1]
            fav += brand[b][w][2]
            add += brand[b][w][3]
            if brand[b][w][1] != 0:
                fre_buy += 1
        score = (math.log(click+2) * (pow((1 + buy),2) + 1) + frequence *(1 + fav + add)) / pow(NOW - (3*last/4.0 + first /4.0 ) ,1.2)
        if buy != 0:
            buyit = 1
        else:
            buyit = 0
        hot[b] = [buyit , score] #记录每个商品的[是否被购买过 , 得分]，得分频次成指数分布
        #score =round(score , 3)
        #string = uid + '\t' + b + '\t' + str(score) + '\n'
        #f.write(string)
    return hot
        #if buy > 0: # 购买过的品牌
        #    brand_buy.append(score)
        #if current in week[-4:]:
        #    lastmonth.append(score)
    # 计算阈值
    #m = sum(hot.values()) / len(hot) # 计算均值 ,lambda的似然估计是 1/m
    #b = (len(brand_buy) + 0.0) / len(brand) # 购买品牌占比
    #if b != 0:
    #    bound = m * math.log(1 / b)
    #    ranklist = [k for k,v in hot.items() if v >= bound]
    #else: #如果没有购买历史如何推荐
    #    return []
    #return ranklist
    #return hot

 def RecForMe(hot): #{hot:[是否购买 0/1,score]}
    m = sum([v[1] for v in hot.values()]) / len(hot)
    b = sum([v[0] for v in hot.values()]) / (len(hot) + 0.0)
    if b!= 0 :
        bound = m * math.log(1 / b)
        reclist = [(k,v) for k,v in hot.items() if v[1] >= bound]
        reclist.sort(key= lambda x: x[1],reverse=True)
        reclist = [i[0] for i in reclist]
        return reclist
    else:
        return []

 def Tianmao(user):
    f = open(FILEPATH + '/predict.txt' , 'w')
    for u in user:
        hot = Hot(user , u)
        predict = RecForMe(hot)
        if len(predict) == 0:
            continue
        string = u + '\t' + ','.join(predict) + '\n'
        f.write(string)
    f.close()

 def Tianmao_v1(user):
    f = open(FILEPATH + '/predict.txt','w')
    pref = Pref(user)
    for u in user:
        rec = Recomend_history(pref,u)
        if len(rec) != 0:
            string = u +'\t' + ','.join(rec) + '\n'
            f.write(string)
    f.close()

 if __name__ == '__main__':
    filename = FILEPATH + '/data.txt' # data.txt / train.txt
    user = Initial(filename)
    Tianmao(user)

 #if __name__ == '__main__':
 #    filename = FILEPATH + '/train.txt'
 #    user = Initial(filename)
 #    Tianmao_v1(user)
	# coding:utf-8
	__author__ = 'tinylamb'

	import os
	import math
	FILEPATH = os.getcwd()
	PERIOD = 5
	NOW = 122 / PERIOD + 1 # 122
	MID = NOW / 2
	GAP = 0.5
	SCORE = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a + 1
	SIMILAR = 0.3


	# user : {brand : {week :[0,1,2,3]}}
	def Initial (filename):
	f = open(filename ,'r')
	data = [d.split() for d in f.readlines()] # userid,brandid,type,time
	user = {}
	for d in data :
	user.setdefault(d[0] , {})
	user[d[0]].setdefault(d[1],{})
	week = int(d[3]) / PERIOD
	user[d[0]][d[1]].setdefault(week , [0,0,0,0])
	user[d[0]][d[1]][week][int(d[2])] += 1
	return user

	# cluster = user: {week :{brand :[0,1,2,3]}}
	def Cluster (filename):
	f = open(filename,'r')
	data = [d.split() for d in f.readlines()]
	cluster = {}
	for d in data :
	cluster.setdefault(d[0],{})
	week = str(int(d[3]) / PERIOD)
	cluster[d[0]].setdefault(week , {})
	cluster[d[0]][week].setdefault(d[1] ,[0,0,0,0])
	cluster[d[0]][week][d[1]][int(d[2])] += 1
	f = open(os.getcwd()+'/cluster.txt','w')
	for c in cluster :
	brand = cluster[c]
	for w in brand :
	for b in brand[w]:
	click ,buy,fav,add = brand[w][b][0],brand[w][b][1],brand[w][b][2],brand[w][b][3]
	s = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a
	score = s(click,buy,fav,add)
	brand[w][b] = round(score ,3)
	string = c + '\t' + b + '\t' + str(round(score,3)) + '\t' + w + '\n'
	f.write(string)
	return cluster

	# pref = user :{brand :{week1 : score ,week2 : score}
	def Pref(user):
	pref = user.copy()
	for u in pref:
	brand = pref[u]
	for b in brand :
	#click ,buy,fav,add = 0,0,0,0
	for w in brand[b]:
	click = brand[b][w][0]
	buy = brand[b][w][1]
	fav = brand[b][w][2]
	add = brand[b][w][3]
	score = SCORE(click , buy ,fav ,add)
	#brand[b][w].append(score)
	brand[b][w] = round(score , 3)
	return pref

	def Jaccard(b1 , b2):
	w1 = set(b1.keys())
	w2 = set(b2.keys())
	j = (len(w1.intersection(w2)) + 0.0) / len(w1.union(w2))
	return j


	def Simi(pref , uid): # similarity of User's brand
	brand = pref[uid]
	simmatrix = {}
	for b1 in brand :
	simmatrix.setdefault(b1,{})
	for b2 in brand:
	simi = Jaccard(brand[b1] , brand[b2])
	simmatrix[b1][b2] = simi
	#for b in simmatrix:
	# simmatrix[b] = sorted(simmatrix[b],key= lambda x:x[1],reverse=True)
	return simmatrix # brand :{b1 :s1 ,b2:s2}

	def Recomend_history(pref , uid):
	brand = pref[uid]
	sim_matrix = Simi(pref , uid)
	recommend = {}
	done = {}
	for b in brand:
	if done.has_key(b):
	continue
	similar = [k for k,v in sim_matrix[b].items() if v > SIMILAR]
	similar.append(b)
	p = Pick(pref , uid , similar)
	if p != '':
	recommend.setdefault(p ,0)
	recommend[p] += 1
	#for s in similar :
	# done[s] = 1
	#recommend = [(k,v) for k,v in recommend.items() if v/2 > 1]
	recommend = [k for k,v in recommend.items() if v/2 >= 2]
	return recommend

	def Pick(pref , uid ,similar):
	maxscore = 0
	predict = ''
	for s in similar:
	trend = pref[uid][s] # {w1:s1,w2:s2...}
	score = sum(trend.values())
	if score > maxscore :
	maxscore = score
	predict = s
	line = pref[uid][predict]
	recent = max(line.keys())
	if recent > MID and line[recent] > 1.1:
	return predict
	else:
	return ''



	# 计算用户的品牌热度
	def Hot (user , uid):
	brand = user[uid] # brand : {w1:[0,1,2,3] , w2:[0,1,2,3]}
	hot = {}
	#f = open(FILEPATH + '/finalscore.txt' ,'a')
	#f1 = open(FILEPATH + '/cluster.txt','a')
	for b in brand:
	click , buy , fav ,add ,recent = 0,0,0,0,0
	week = sorted(brand[b].keys())
	fre_buy = 0 #购买频率
	frequence = len(week) #浏览频率
	first = week[0]
	last = week[-1]
	for w in week:
	#c , bu , f , a = brand[b][w][0],brand[b][w][1],brand[b][w][2],brand[b][w][3]
	#w_score = SCORE(c, bu , f ,a)
	#string1 = uid + '\t' + b + '\t' + str(round(w_score ,3)) + '\t' + str(w) + '\n'
	#f1.write(string1)
	click += brand[b][w][0]
	buy += brand[b][w][1]
	fav += brand[b][w][2]
	add += brand[b][w][3]
	if brand[b][w][1] != 0:
	fre_buy += 1
	score = (math.log(click+2) * (pow((1 + buy),2) + 1) + frequence (1 + fav + add)) / pow(NOW - (3last/4.0 + first /4.0 ) ,1.2)
	if buy != 0:
	buyit = 1
	else:
	buyit = 0
	hot[b] = [buyit , score] #记录每个商品的[是否被购买过 , 得分]，得分频次成指数分布
	#score =round(score , 3)
	#string = uid + '\t' + b + '\t' + str(score) + '\n'
	#f.write(string)
	return hot
	#if buy > 0: # 购买过的品牌
	# brand_buy.append(score)
	#if current in week[-4:]:
	# lastmonth.append(score)
	# 计算阈值
	#m = sum(hot.values()) / len(hot) # 计算均值 ,lambda的似然估计是 1/m
	#b = (len(brand_buy) + 0.0) / len(brand) # 购买品牌占比
	#if b != 0:
	# bound = m * math.log(1 / b)
	# ranklist = [k for k,v in hot.items() if v >= bound]
	#else: #如果没有购买历史如何推荐
	# return []
	#return ranklist
	#return hot

	def RecForMe(hot): #{hot:[是否购买 0/1,score]}
	m = sum([v[1] for v in hot.values()]) / len(hot)
	b = sum([v[0] for v in hot.values()]) / (len(hot) + 0.0)
	if b!= 0 :
	bound = m * math.log(1 / b)
	reclist = [(k,v) for k,v in hot.items() if v[1] >= bound]
	reclist.sort(key= lambda x: x[1],reverse=True)
	reclist = [i[0] for i in reclist]
	return reclist
	else:
	return []

	def Tianmao(user):
	f = open(FILEPATH + '/predict.txt' , 'w')
	for u in user:
	hot = Hot(user , u)
	predict = RecForMe(hot)
	if len(predict) == 0:
	continue
	string = u + '\t' + ','.join(predict) + '\n'
	f.write(string)
	f.close()

	def Tianmao_v1(user):
	f = open(FILEPATH + '/predict.txt','w')
	pref = Pref(user)
	for u in user:
	rec = Recomend_history(pref,u)
	if len(rec) != 0:
	string = u +'\t' + ','.join(rec) + '\n'
	f.write(string)
	f.close()

	if __name__ == '__main__':
	filename = FILEPATH + '/data.txt' # data.txt / train.txt
	user = Initial(filename)
	Tianmao(user)

	#if __name__ == '__main__':
	# filename = FILEPATH + '/train.txt'
	# user = Initial(filename)
	# Tianmao_v1(user)