Last active
August 27, 2017 08:28
-
-
Save bowbowbow/902595e93dc7484344d2c5c9f51b5248 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from decimal import * | |
import random | |
import time | |
getcontext().prec = 100 | |
# convert to big decimal | |
def lf(num): | |
return Decimal(num) | |
class Pintrust: | |
# parameter | |
target_user = 5446 # 5446 | |
alpha = lf(1e-2) | |
beta = lf(1e-5) | |
eps = lf(0.005) | |
RF = lf(0.1) | |
user_num = 233432 #233432 | |
iteration = 1 | |
topk = 200 | |
# belief score | |
prior_belief_score = [] | |
final_belief_score = [] | |
# file path | |
input_file_path = "./user_user_matrix.txt" | |
output_file_path = "./target_"+str(target_user)+"_output.txt" | |
# timer | |
start_time = 0 | |
# input data | |
trust = {} | |
distrust = {} | |
rating = {} | |
# graph | |
adj = {} | |
cache_FR = {} | |
# for testing | |
cut = {} | |
alive = {} | |
edge_type = { | |
'positive': 0, | |
'positive_reverse': 1, | |
'distrust': 2, | |
'distrust_reverse': 3 | |
} | |
def __init__(self): | |
self.start_time = time.time() | |
self.init_belief_score() | |
# set dictionary default value to list | |
self.adj = defaultdict(lambda: []) | |
self.rating = defaultdict(lambda: []) | |
def check_time(self, title): | |
print("[" + str(time.time() - self.start_time) + " seconds] [" + title + "]") | |
def add_trust(self, u, v): | |
self.trust[(u, v)] = 1 | |
def add_distrust(self, u, v): | |
self.distrust[(u, v)] = 1 | |
def add_rating(self, u, v, r): | |
self.rating[(u, v)].append(r) | |
def init_belief_score(self): | |
# init belief_score | |
self.prior_belief_score = [[lf(1.0), lf(1.0)]] * self.user_num | |
self.final_belief_score = [[lf(1.0), lf(1.0)]] * self.user_num | |
for i in range(0, self.user_num): | |
self.prior_belief_score[i] = [lf(0.5) + self.beta, lf(0.5) - self.beta] | |
self.prior_belief_score[self.target_user] = [lf(0.5) + self.alpha, lf(0.5) - self.alpha] | |
def make_trust_edge(self): | |
tmp_adj = {} | |
for key, value in self.trust.items(): | |
u = key[0] | |
v = key[1] | |
tmp_adj[(u, v)] = 1 | |
for key, value in self.rating.items(): | |
u = key[0] | |
v = key[1] | |
tmp_adj[(u, v)] = 1 | |
for key, value in tmp_adj.items(): | |
u = key[0] | |
v = key[1] | |
self.adj[u].append((v, self.edge_type['positive'])) | |
# reverse edge | |
self.adj[v].append((u, self.edge_type['positive_reverse'])) | |
def make_distrust_edge(self): | |
for key, value in self.distrust.items(): | |
u = key[0] | |
v = key[1] | |
self.adj[u].append((v, self.edge_type['distrust'])) | |
# reverse edge | |
self.adj[v].append((u, self.edge_type['distrust_reverse'])) | |
def get_cache_FR(self, u, v): | |
if (u,v) in self.cache_FR: | |
return self.cache_FR[(u, v)] | |
else: | |
FR = self.get_FR(u, v) | |
self.cache_FR[(u, v)] = FR | |
return FR | |
def get_FR(self, u, v): | |
rating_cnt = 0 | |
rating_sum = 0 | |
rating_aver = 0 | |
if (u, v) in self.rating: | |
rating_cnt = len(self.rating[(u, v)]) | |
rating_list = self.rating[(u, v)] | |
for r in range(len(rating_list)): | |
rating_sum += r | |
rating_aver = rating_sum / (rating_cnt + 1e-9) | |
if rating_cnt == 0: | |
return 0 | |
if rating_cnt >= 64: | |
if rating_aver <= 3: return 0 | |
if rating_cnt <= 127: return 0.75 | |
if rating_cnt <= 255: return 0.80 | |
if rating_cnt <= 511: return 0.85 | |
return 0.85 | |
if rating_cnt == 1: | |
if rating_aver <= 3: | |
return 0.01 | |
else: | |
return 0.05 | |
if rating_cnt <= 3: | |
if rating_aver <= 3: | |
return 0.01 | |
else: | |
return 0.1 | |
if rating_cnt <= 7: | |
if rating_aver <= 3: | |
return 0.01 | |
else: | |
return 0.3 | |
if rating_cnt <= 15: | |
if rating_aver <= 3: | |
return 0.02 | |
else: | |
return 0.4 | |
if rating_cnt <= 31: | |
if rating_aver <= 3: | |
return 0.03 | |
else: | |
return 0.55 | |
if rating_cnt <= 63: | |
if rating_aver <= 3: | |
return 0.02 | |
else: | |
return 0.7 | |
return 0 | |
def get_propagation_matrix(self, u, v, edge_type): | |
mat = [[0.0, 0.0], [0.0, 0.0]] | |
trust_relation = lf(0) | |
if (u, v) in self.trust: | |
trust_relation = lf(1) | |
if edge_type == self.edge_type['positive']: | |
mc = self.eps * (trust_relation + lf(self.get_cache_FR(u, v))) | |
mat[0][0] = mat[1][1] =lf(0.5) + mc | |
mat[1][0] = mat[0][1] =lf(0.5) - mc | |
elif edge_type == self.edge_type['positive_reverse']: | |
mc = self.eps * (trust_relation + lf(self.get_cache_FR(u, v))) * self.RF | |
mat[0][0] = mat[1][1] = lf(0.5) + mc | |
mat[1][0] = mat[0][1] = lf(0.5) - mc | |
elif edge_type == self.edge_type['distrust']: | |
mc = self.eps | |
mat[0][0] = mat[1][1] = lf(0.5) - mc | |
mat[1][0] = mat[0][1] = lf(0.5) + mc | |
elif edge_type == self.edge_type['distrust_reverse']: | |
mc = self.eps * self.RF | |
mat[0][0] = mat[1][1] = lf(0.5) - mc | |
mat[1][0] = mat[0][1] = lf(0.5) + mc | |
return mat | |
def propagate_msg(self, u, v, received_msg, edge_type): | |
msg = [lf(1.0), lf(1.0)] | |
psi = self.get_propagation_matrix(u, v, edge_type) | |
for p in range(0, 2): | |
sum = 0 | |
for q in range(0, 2): | |
sum += self.prior_belief_score[u][q] * psi[q][p] * received_msg[q] | |
msg[p] = sum | |
return msg | |
def normalize_msg(self, msg): | |
sum = lf(0.0) | |
for i in range(0, 2): | |
sum += msg[i] | |
for i in range(0, 2): | |
msg[i] = msg[i] / sum | |
return msg | |
def run_belief_propagation(self): | |
self.make_trust_edge() | |
self.make_distrust_edge() | |
MSG = defaultdict(lambda: [lf(1.0), lf(1.0)]) | |
for ite in range(0, self.iteration): | |
for u in range(0, self.user_num): | |
if u % 1000 == 0: | |
self.check_time("now user u : " + str(u)) | |
msg = [lf(1.0), lf(1.0)] | |
for i in range(0, len(self.adj[u])): | |
edge = self.adj[u][i] | |
v = edge[0] | |
MSG_vu = MSG[(v, u)] | |
msg[0] *= MSG_vu[0] | |
msg[1] *= MSG_vu[1] | |
for i in range(0, len(self.adj[u])): | |
edge = self.adj[u][i] | |
v = edge[0] | |
type = edge[1] | |
received_msg = [lf(1.0), lf(1.0)] | |
for p in range(0, 2): | |
received_msg[p] = msg[p] / MSG[(v, u)][p] | |
MSG_uv = self.propagate_msg(u, v, received_msg, type) | |
MSG[(u, v)] = self.normalize_msg(MSG_uv) | |
self.check_time('iteration ' + str(ite) + ' finish') | |
for u in range(0, self.user_num): | |
msg = [lf(1.0), lf(1.0)] | |
for i in range(0, len(self.adj[u])): | |
edge = self.adj[u][i] | |
v = edge[0] | |
MSG_vu = MSG[(v, u)] | |
msg[0] *= MSG_vu[0] | |
msg[1] *= MSG_vu[1] | |
for p in range(0, 2): | |
msg[p] = msg[p] * self.prior_belief_score[u][p] | |
msg = self.normalize_msg(msg) | |
self.final_belief_score[u] = msg | |
def run(self): | |
self.check_time('start run pintrust') | |
self.read_input() | |
self.check_time('read_input finish') | |
self.run_belief_propagation() | |
self.check_time('run_belief_propagation finish') | |
output = [] | |
for i in range(0, len(self.final_belief_score)): | |
trust = self.final_belief_score[i][0] | |
distrust = self.final_belief_score[i][1] | |
output.append((i, trust, distrust)) | |
output.sort(key=lambda x: x[1], reverse=True) | |
self.write_output(output) | |
self.check_time('write_output finish') | |
def read_input(self): | |
f = open(self.input_file_path, "r") | |
while True: | |
line = f.readline() | |
if not line: break | |
p = line.split() | |
u = int(p[0]) | |
v = int(p[1]) | |
t = int(p[2]) | |
r = int(p[3]) | |
if t == 0: | |
pintrust.add_rating(u, v, r) | |
elif t == 1: | |
if u == self.target_user: | |
# random cut 25% | |
if random.randrange(1,4) == 1: | |
self.cut[v]=1 | |
continue | |
else: | |
self.alive[v]=1 | |
pintrust.add_trust(u, v) | |
elif t == -1: | |
pintrust.add_distrust(u, v) | |
f.close() | |
def write_output(self, output): | |
f = open(self.output_file_path, "w") | |
for i in range(0, min(len(output), self.topk)): | |
u = str(output[i][0]) | |
trust = str(output[i][1]) | |
distrust = str(output[i][2]) | |
f.write(u + " (" + trust + "," + distrust + ") \n") | |
f.close() | |
def check_output(self): | |
f = open(self.output_file_path, "r") | |
hit_cut = 0 | |
hit_alive = 0 | |
while True: | |
line = f.readline() | |
if not line: break | |
p = line.split() | |
v = int(p[0]) | |
if v in self.cut: | |
hit_cut += 1 | |
if v in self.alive: | |
hit_alive += 1 | |
print('cut size :' + str(len(self.cut))) | |
print('hit_cut :' + str(hit_cut)) | |
print('alive size:' + str(len(self.alive))) | |
print('hit_alive :' + str(hit_alive)) | |
f.close() | |
pintrust = Pintrust() | |
pintrust.run() | |
pintrust.check_output() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment