Skip to content

Instantly share code, notes, and snippets.

@tinylamb
Created April 9, 2014 14:10
Show Gist options
  • Save tinylamb/10275027 to your computer and use it in GitHub Desktop.
Save tinylamb/10275027 to your computer and use it in GitHub Desktop.
天猫推荐算法
# coding:utf-8
__author__ = 'tinylamb'
import os
import math
FILEPATH = os.getcwd()
PERIOD = 5
NOW = 122 / PERIOD + 1 # 122
MID = NOW / 2
GAP = 0.5
SCORE = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a + 1
SIMILAR = 0.3
# user : {brand : {week :[0,1,2,3]}}
def Initial (filename):
f = open(filename ,'r')
data = [d.split() for d in f.readlines()] # userid,brandid,type,time
user = {}
for d in data :
user.setdefault(d[0] , {})
user[d[0]].setdefault(d[1],{})
week = int(d[3]) / PERIOD
user[d[0]][d[1]].setdefault(week , [0,0,0,0])
user[d[0]][d[1]][week][int(d[2])] += 1
return user
# cluster = user: {week :{brand :[0,1,2,3]}}
def Cluster (filename):
f = open(filename,'r')
data = [d.split() for d in f.readlines()]
cluster = {}
for d in data :
cluster.setdefault(d[0],{})
week = str(int(d[3]) / PERIOD)
cluster[d[0]].setdefault(week , {})
cluster[d[0]][week].setdefault(d[1] ,[0,0,0,0])
cluster[d[0]][week][d[1]][int(d[2])] += 1
f = open(os.getcwd()+'/cluster.txt','w')
for c in cluster :
brand = cluster[c]
for w in brand :
for b in brand[w]:
click ,buy,fav,add = brand[w][b][0],brand[w][b][1],brand[w][b][2],brand[w][b][3]
s = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a
score = s(click,buy,fav,add)
brand[w][b] = round(score ,3)
string = c + '\t' + b + '\t' + str(round(score,3)) + '\t' + w + '\n'
f.write(string)
return cluster
# pref = user :{brand :{week1 : score ,week2 : score}
def Pref(user):
pref = user.copy()
for u in pref:
brand = pref[u]
for b in brand :
#click ,buy,fav,add = 0,0,0,0
for w in brand[b]:
click = brand[b][w][0]
buy = brand[b][w][1]
fav = brand[b][w][2]
add = brand[b][w][3]
score = SCORE(click , buy ,fav ,add)
#brand[b][w].append(score)
brand[b][w] = round(score , 3)
return pref
def Jaccard(b1 , b2):
w1 = set(b1.keys())
w2 = set(b2.keys())
j = (len(w1.intersection(w2)) + 0.0) / len(w1.union(w2))
return j
def Simi(pref , uid): # similarity of User's brand
brand = pref[uid]
simmatrix = {}
for b1 in brand :
simmatrix.setdefault(b1,{})
for b2 in brand:
simi = Jaccard(brand[b1] , brand[b2])
simmatrix[b1][b2] = simi
#for b in simmatrix:
# simmatrix[b] = sorted(simmatrix[b],key= lambda x:x[1],reverse=True)
return simmatrix # brand :{b1 :s1 ,b2:s2}
def Recomend_history(pref , uid):
brand = pref[uid]
sim_matrix = Simi(pref , uid)
recommend = {}
done = {}
for b in brand:
if done.has_key(b):
continue
similar = [k for k,v in sim_matrix[b].items() if v > SIMILAR]
similar.append(b)
p = Pick(pref , uid , similar)
if p != '':
recommend.setdefault(p ,0)
recommend[p] += 1
#for s in similar :
# done[s] = 1
#recommend = [(k,v) for k,v in recommend.items() if v/2 > 1]
recommend = [k for k,v in recommend.items() if v/2 >= 2]
return recommend
def Pick(pref , uid ,similar):
maxscore = 0
predict = ''
for s in similar:
trend = pref[uid][s] # {w1:s1,w2:s2...}
score = sum(trend.values())
if score > maxscore :
maxscore = score
predict = s
line = pref[uid][predict]
recent = max(line.keys())
if recent > MID and line[recent] > 1.1:
return predict
else:
return ''
# 计算用户的品牌热度
def Hot (user , uid):
brand = user[uid] # brand : {w1:[0,1,2,3] , w2:[0,1,2,3]}
hot = {}
#f = open(FILEPATH + '/finalscore.txt' ,'a')
#f1 = open(FILEPATH + '/cluster.txt','a')
for b in brand:
click , buy , fav ,add ,recent = 0,0,0,0,0
week = sorted(brand[b].keys())
fre_buy = 0 #购买频率
frequence = len(week) #浏览频率
first = week[0]
last = week[-1]
for w in week:
#c , bu , f , a = brand[b][w][0],brand[b][w][1],brand[b][w][2],brand[b][w][3]
#w_score = SCORE(c, bu , f ,a)
#string1 = uid + '\t' + b + '\t' + str(round(w_score ,3)) + '\t' + str(w) + '\n'
#f1.write(string1)
click += brand[b][w][0]
buy += brand[b][w][1]
fav += brand[b][w][2]
add += brand[b][w][3]
if brand[b][w][1] != 0:
fre_buy += 1
score = (math.log(click+2) * (pow((1 + buy),2) + 1) + frequence *(1 + fav + add)) / pow(NOW - (3*last/4.0 + first /4.0 ) ,1.2)
if buy != 0:
buyit = 1
else:
buyit = 0
hot[b] = [buyit , score] #记录每个商品的[是否被购买过 , 得分],得分频次成指数分布
#score =round(score , 3)
#string = uid + '\t' + b + '\t' + str(score) + '\n'
#f.write(string)
return hot
#if buy > 0: # 购买过的品牌
# brand_buy.append(score)
#if current in week[-4:]:
# lastmonth.append(score)
# 计算阈值
#m = sum(hot.values()) / len(hot) # 计算均值 ,lambda的似然估计是 1/m
#b = (len(brand_buy) + 0.0) / len(brand) # 购买品牌占比
#if b != 0:
# bound = m * math.log(1 / b)
# ranklist = [k for k,v in hot.items() if v >= bound]
#else: #如果没有购买历史如何推荐
# return []
#return ranklist
#return hot
def RecForMe(hot): #{hot:[是否购买 0/1,score]}
m = sum([v[1] for v in hot.values()]) / len(hot)
b = sum([v[0] for v in hot.values()]) / (len(hot) + 0.0)
if b!= 0 :
bound = m * math.log(1 / b)
reclist = [(k,v) for k,v in hot.items() if v[1] >= bound]
reclist.sort(key= lambda x: x[1],reverse=True)
reclist = [i[0] for i in reclist]
return reclist
else:
return []
def Tianmao(user):
f = open(FILEPATH + '/predict.txt' , 'w')
for u in user:
hot = Hot(user , u)
predict = RecForMe(hot)
if len(predict) == 0:
continue
string = u + '\t' + ','.join(predict) + '\n'
f.write(string)
f.close()
def Tianmao_v1(user):
f = open(FILEPATH + '/predict.txt','w')
pref = Pref(user)
for u in user:
rec = Recomend_history(pref,u)
if len(rec) != 0:
string = u +'\t' + ','.join(rec) + '\n'
f.write(string)
f.close()
if __name__ == '__main__':
filename = FILEPATH + '/data.txt' # data.txt / train.txt
user = Initial(filename)
Tianmao(user)
#if __name__ == '__main__':
# filename = FILEPATH + '/train.txt'
# user = Initial(filename)
# Tianmao_v1(user)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment