Created
April 9, 2014 14:10
-
-
Save tinylamb/10275027 to your computer and use it in GitHub Desktop.
天猫推荐算法
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding:utf-8 | |
__author__ = 'tinylamb' | |
import os | |
import math | |
FILEPATH = os.getcwd() | |
PERIOD = 5 | |
NOW = 122 / PERIOD + 1 # 122 | |
MID = NOW / 2 | |
GAP = 0.5 | |
SCORE = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a + 1 | |
SIMILAR = 0.3 | |
# user : {brand : {week :[0,1,2,3]}} | |
def Initial (filename): | |
f = open(filename ,'r') | |
data = [d.split() for d in f.readlines()] # userid,brandid,type,time | |
user = {} | |
for d in data : | |
user.setdefault(d[0] , {}) | |
user[d[0]].setdefault(d[1],{}) | |
week = int(d[3]) / PERIOD | |
user[d[0]][d[1]].setdefault(week , [0,0,0,0]) | |
user[d[0]][d[1]][week][int(d[2])] += 1 | |
return user | |
# cluster = user: {week :{brand :[0,1,2,3]}} | |
def Cluster (filename): | |
f = open(filename,'r') | |
data = [d.split() for d in f.readlines()] | |
cluster = {} | |
for d in data : | |
cluster.setdefault(d[0],{}) | |
week = str(int(d[3]) / PERIOD) | |
cluster[d[0]].setdefault(week , {}) | |
cluster[d[0]][week].setdefault(d[1] ,[0,0,0,0]) | |
cluster[d[0]][week][d[1]][int(d[2])] += 1 | |
f = open(os.getcwd()+'/cluster.txt','w') | |
for c in cluster : | |
brand = cluster[c] | |
for w in brand : | |
for b in brand[w]: | |
click ,buy,fav,add = brand[w][b][0],brand[w][b][1],brand[w][b][2],brand[w][b][3] | |
s = lambda c,b,f,a : math.log(c + 2) * (pow(b,2) + 1) + f + a | |
score = s(click,buy,fav,add) | |
brand[w][b] = round(score ,3) | |
string = c + '\t' + b + '\t' + str(round(score,3)) + '\t' + w + '\n' | |
f.write(string) | |
return cluster | |
# pref = user :{brand :{week1 : score ,week2 : score} | |
def Pref(user): | |
pref = user.copy() | |
for u in pref: | |
brand = pref[u] | |
for b in brand : | |
#click ,buy,fav,add = 0,0,0,0 | |
for w in brand[b]: | |
click = brand[b][w][0] | |
buy = brand[b][w][1] | |
fav = brand[b][w][2] | |
add = brand[b][w][3] | |
score = SCORE(click , buy ,fav ,add) | |
#brand[b][w].append(score) | |
brand[b][w] = round(score , 3) | |
return pref | |
def Jaccard(b1 , b2): | |
w1 = set(b1.keys()) | |
w2 = set(b2.keys()) | |
j = (len(w1.intersection(w2)) + 0.0) / len(w1.union(w2)) | |
return j | |
def Simi(pref , uid): # similarity of User's brand | |
brand = pref[uid] | |
simmatrix = {} | |
for b1 in brand : | |
simmatrix.setdefault(b1,{}) | |
for b2 in brand: | |
simi = Jaccard(brand[b1] , brand[b2]) | |
simmatrix[b1][b2] = simi | |
#for b in simmatrix: | |
# simmatrix[b] = sorted(simmatrix[b],key= lambda x:x[1],reverse=True) | |
return simmatrix # brand :{b1 :s1 ,b2:s2} | |
def Recomend_history(pref , uid): | |
brand = pref[uid] | |
sim_matrix = Simi(pref , uid) | |
recommend = {} | |
done = {} | |
for b in brand: | |
if done.has_key(b): | |
continue | |
similar = [k for k,v in sim_matrix[b].items() if v > SIMILAR] | |
similar.append(b) | |
p = Pick(pref , uid , similar) | |
if p != '': | |
recommend.setdefault(p ,0) | |
recommend[p] += 1 | |
#for s in similar : | |
# done[s] = 1 | |
#recommend = [(k,v) for k,v in recommend.items() if v/2 > 1] | |
recommend = [k for k,v in recommend.items() if v/2 >= 2] | |
return recommend | |
def Pick(pref , uid ,similar): | |
maxscore = 0 | |
predict = '' | |
for s in similar: | |
trend = pref[uid][s] # {w1:s1,w2:s2...} | |
score = sum(trend.values()) | |
if score > maxscore : | |
maxscore = score | |
predict = s | |
line = pref[uid][predict] | |
recent = max(line.keys()) | |
if recent > MID and line[recent] > 1.1: | |
return predict | |
else: | |
return '' | |
# 计算用户的品牌热度 | |
def Hot (user , uid): | |
brand = user[uid] # brand : {w1:[0,1,2,3] , w2:[0,1,2,3]} | |
hot = {} | |
#f = open(FILEPATH + '/finalscore.txt' ,'a') | |
#f1 = open(FILEPATH + '/cluster.txt','a') | |
for b in brand: | |
click , buy , fav ,add ,recent = 0,0,0,0,0 | |
week = sorted(brand[b].keys()) | |
fre_buy = 0 #购买频率 | |
frequence = len(week) #浏览频率 | |
first = week[0] | |
last = week[-1] | |
for w in week: | |
#c , bu , f , a = brand[b][w][0],brand[b][w][1],brand[b][w][2],brand[b][w][3] | |
#w_score = SCORE(c, bu , f ,a) | |
#string1 = uid + '\t' + b + '\t' + str(round(w_score ,3)) + '\t' + str(w) + '\n' | |
#f1.write(string1) | |
click += brand[b][w][0] | |
buy += brand[b][w][1] | |
fav += brand[b][w][2] | |
add += brand[b][w][3] | |
if brand[b][w][1] != 0: | |
fre_buy += 1 | |
score = (math.log(click+2) * (pow((1 + buy),2) + 1) + frequence *(1 + fav + add)) / pow(NOW - (3*last/4.0 + first /4.0 ) ,1.2) | |
if buy != 0: | |
buyit = 1 | |
else: | |
buyit = 0 | |
hot[b] = [buyit , score] #记录每个商品的[是否被购买过 , 得分],得分频次成指数分布 | |
#score =round(score , 3) | |
#string = uid + '\t' + b + '\t' + str(score) + '\n' | |
#f.write(string) | |
return hot | |
#if buy > 0: # 购买过的品牌 | |
# brand_buy.append(score) | |
#if current in week[-4:]: | |
# lastmonth.append(score) | |
# 计算阈值 | |
#m = sum(hot.values()) / len(hot) # 计算均值 ,lambda的似然估计是 1/m | |
#b = (len(brand_buy) + 0.0) / len(brand) # 购买品牌占比 | |
#if b != 0: | |
# bound = m * math.log(1 / b) | |
# ranklist = [k for k,v in hot.items() if v >= bound] | |
#else: #如果没有购买历史如何推荐 | |
# return [] | |
#return ranklist | |
#return hot | |
def RecForMe(hot): #{hot:[是否购买 0/1,score]} | |
m = sum([v[1] for v in hot.values()]) / len(hot) | |
b = sum([v[0] for v in hot.values()]) / (len(hot) + 0.0) | |
if b!= 0 : | |
bound = m * math.log(1 / b) | |
reclist = [(k,v) for k,v in hot.items() if v[1] >= bound] | |
reclist.sort(key= lambda x: x[1],reverse=True) | |
reclist = [i[0] for i in reclist] | |
return reclist | |
else: | |
return [] | |
def Tianmao(user): | |
f = open(FILEPATH + '/predict.txt' , 'w') | |
for u in user: | |
hot = Hot(user , u) | |
predict = RecForMe(hot) | |
if len(predict) == 0: | |
continue | |
string = u + '\t' + ','.join(predict) + '\n' | |
f.write(string) | |
f.close() | |
def Tianmao_v1(user): | |
f = open(FILEPATH + '/predict.txt','w') | |
pref = Pref(user) | |
for u in user: | |
rec = Recomend_history(pref,u) | |
if len(rec) != 0: | |
string = u +'\t' + ','.join(rec) + '\n' | |
f.write(string) | |
f.close() | |
if __name__ == '__main__': | |
filename = FILEPATH + '/data.txt' # data.txt / train.txt | |
user = Initial(filename) | |
Tianmao(user) | |
#if __name__ == '__main__': | |
# filename = FILEPATH + '/train.txt' | |
# user = Initial(filename) | |
# Tianmao_v1(user) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment