Created
August 31, 2017 05:01
-
-
Save bowbowbow/2020c58c431f92d0cc5daf512795c8b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from decimal import * | |
import random | |
import time | |
#getcontext().prec = 1000 | |
# convert to big decimal | |
def lf(num): | |
return Decimal(num) | |
class Pintrust: | |
# parameter | |
target_user = 131124 # 131124 | |
alpha = lf(1e-2) | |
beta = lf(1e-5) | |
eps = lf(0.001) | |
RF = lf(0.1) | |
user_num = 131828 # 131828 | |
iteration = 10 | |
topk = 500 | |
# belief score | |
prior_belief_score = [] | |
final_belief_score = [] | |
# file path | |
input_file_path = "./realinfo.txt" | |
output_file_path = "./target_"+str(target_user)+"_output.txt" | |
# timer | |
start_time = 0 | |
# input data | |
trust = {} | |
distrust = {} | |
rating = {} | |
# graph | |
adj = {} | |
cache_FR = {} | |
# for testing | |
cut = {} | |
alive = {} | |
edge_type = { | |
'positive': 0, | |
'positive_reverse': 1, | |
'distrust': 2, | |
'distrust_reverse': 3 | |
} | |
def __init__(self): | |
self.start_time = time.time() | |
self.init_belief_score() | |
# set dictionary default value to list | |
self.adj = defaultdict(lambda: []) | |
self.rating = defaultdict(lambda: []) | |
def check_time(self, title): | |
print("[" + str(time.time() - self.start_time) + " seconds] [" + title + "]") | |
def add_trust(self, u, v): | |
self.trust[(u, v)] = 1 | |
def add_distrust(self, u, v): | |
self.distrust[(u, v)] = 1 | |
def add_rating(self, u, v, r): | |
self.rating[(u, v)].append(r) | |
def init_belief_score(self): | |
# init belief_score | |
self.prior_belief_score = [[lf(1.0), lf(1.0)]] * self.user_num | |
self.final_belief_score = [[lf(1.0), lf(1.0)]] * self.user_num | |
for i in range(0, self.user_num): | |
self.prior_belief_score[i] = [lf(0.5) + self.beta, lf(0.5) - self.beta] | |
self.prior_belief_score[self.target_user] = [lf(0.5) + self.alpha, lf(0.5) - self.alpha] | |
def make_trust_edge(self): | |
tmp_adj = {} | |
for key, value in self.trust.items(): | |
u = key[0] | |
v = key[1] | |
tmp_adj[(u, v)] = 1 | |
for key, value in self.rating.items(): | |
u = key[0] | |
v = key[1] | |
tmp_adj[(u, v)] = 1 | |
for key, value in tmp_adj.items(): | |
u = key[0] | |
v = key[1] | |
self.adj[u].append((v, self.edge_type['positive'])) | |
# reverse edge | |
self.adj[v].append((u, self.edge_type['positive_reverse'])) | |
def make_distrust_edge(self): | |
for key, value in self.distrust.items(): | |
u = key[0] | |
v = key[1] | |
self.adj[u].append((v, self.edge_type['distrust'])) | |
# reverse edge | |
self.adj[v].append((u, self.edge_type['distrust_reverse'])) | |
def get_cache_FR(self, u, v): | |
if (u,v) in self.cache_FR: | |
return self.cache_FR[(u, v)] | |
else: | |
FR = self.get_FR(u, v) | |
self.cache_FR[(u, v)] = FR | |
return FR | |
def get_FR(self, u, v): | |
rating_cnt = 0 | |
rating_sum = 0 | |
rating_aver = 0 | |
if (u, v) in self.rating: | |
rating_cnt = len(self.rating[(u, v)]) | |
rating_list = self.rating[(u, v)] | |
for r in range(len(rating_list)): | |
rating_sum += r | |
rating_aver = rating_sum / (rating_cnt + 1e-9) | |
if rating_cnt == 0: | |
return 0 | |
if rating_cnt >= 64: | |
if rating_aver <= 3: return 0 | |
if rating_cnt <= 127: return 0.75 | |
if rating_cnt <= 255: return 0.80 | |
if rating_cnt <= 511: return 0.85 | |
return 0.85 | |
if rating_cnt == 1: | |
if rating_aver <= 3: | |
return 0.01 | |
else: | |
return 0.05 | |
if rating_cnt <= 3: | |
if rating_aver <= 3: | |
return 0.01 | |
else: | |
return 0.1 | |
if rating_cnt <= 7: | |
if rating_aver <= 3: | |
return 0.01 | |
else: | |
return 0.3 | |
if rating_cnt <= 15: | |
if rating_aver <= 3: | |
return 0.02 | |
else: | |
return 0.4 | |
if rating_cnt <= 31: | |
if rating_aver <= 3: | |
return 0.03 | |
else: | |
return 0.55 | |
if rating_cnt <= 63: | |
if rating_aver <= 3: | |
return 0.02 | |
else: | |
return 0.7 | |
return 0 | |
def get_propagation_matrix(self, u, v, edge_type): | |
mat = [[0.0, 0.0], [0.0, 0.0]] | |
trust_relation = lf(0) | |
if (u, v) in self.trust: | |
trust_relation = lf(1) | |
if edge_type == self.edge_type['positive']: | |
mc = self.eps * (trust_relation + lf(self.get_cache_FR(u, v))) | |
mat[0][0] = mat[1][1] =lf(0.5) + mc | |
mat[1][0] = mat[0][1] =lf(0.5) - mc | |
elif edge_type == self.edge_type['positive_reverse']: | |
mc = self.eps * (trust_relation + lf(self.get_cache_FR(u, v))) * self.RF | |
mat[0][0] = mat[1][1] = lf(0.5) + mc | |
mat[1][0] = mat[0][1] = lf(0.5) - mc | |
elif edge_type == self.edge_type['distrust']: | |
mc = self.eps | |
mat[0][0] = mat[1][1] = lf(0.5) - mc | |
mat[1][0] = mat[0][1] = lf(0.5) + mc | |
elif edge_type == self.edge_type['distrust_reverse']: | |
mc = self.eps * self.RF | |
mat[0][0] = mat[1][1] = lf(0.5) - mc | |
mat[1][0] = mat[0][1] = lf(0.5) + mc | |
return mat | |
def propagate_msg(self, u, v, received_msg, edge_type): | |
msg = [lf(1.0), lf(1.0)] | |
psi = self.get_propagation_matrix(u, v, edge_type) | |
for p in range(0, 2): | |
sum = 0 | |
for q in range(0, 2): | |
sum += self.prior_belief_score[u][q] * psi[q][p] * received_msg[q] | |
msg[p] = sum | |
return msg | |
def normalize_msg(self, msg): | |
sum = lf(0.0) | |
for i in range(0, 2): | |
sum += msg[i] | |
for i in range(0, 2): | |
msg[i] = msg[i] / sum | |
return msg | |
def run_belief_propagation(self): | |
self.make_trust_edge() | |
self.make_distrust_edge() | |
MSG = defaultdict(lambda: [lf(1.0), lf(1.0)]) | |
for ite in range(0, self.iteration): | |
for u in range(0, self.user_num): | |
if u % 1000 == 0: | |
self.check_time("now user u : " + str(u)) | |
msg = [lf(1.0), lf(1.0)] | |
for i in range(0, len(self.adj[u])): | |
edge = self.adj[u][i] | |
v = edge[0] | |
MSG_vu = MSG[(v, u)] | |
msg[0] *= MSG_vu[0] | |
msg[1] *= MSG_vu[1] | |
msg = self.normalize_msg(msg) | |
for i in range(0, len(self.adj[u])): | |
edge = self.adj[u][i] | |
v = edge[0] | |
type = edge[1] | |
received_msg = [lf(1.0), lf(1.0)] | |
for p in range(0, 2): | |
received_msg[p] = msg[p] / MSG[(v, u)][p] | |
msg = self.normalize_msg(msg) | |
MSG_uv = self.propagate_msg(u, v, received_msg, type) | |
MSG[(u, v)] = self.normalize_msg(MSG_uv) | |
self.check_time('iteration ' + str(ite) + ' finish') | |
for u in range(0, self.user_num): | |
msg = [lf(1.0), lf(1.0)] | |
for i in range(0, len(self.adj[u])): | |
edge = self.adj[u][i] | |
v = edge[0] | |
MSG_vu = MSG[(v, u)] | |
msg[0] *= MSG_vu[0] | |
msg[1] *= MSG_vu[1] | |
msg = self.normalize_msg(msg) | |
for p in range(0, 2): | |
msg[p] = msg[p] * self.prior_belief_score[u][p] | |
msg = self.normalize_msg(msg) | |
self.final_belief_score[u] = msg | |
def run(self): | |
self.check_time('start run pintrust') | |
self.read_input() | |
self.check_time('read_input finish') | |
self.run_belief_propagation() | |
self.check_time('run_belief_propagation finish') | |
output = [] | |
for i in range(0, len(self.final_belief_score)): | |
trust = self.final_belief_score[i][0] | |
distrust = self.final_belief_score[i][1] | |
output.append((i, trust, distrust)) | |
output.sort(key=lambda x: x[1], reverse=True) | |
self.write_output(output) | |
self.check_time('write_output finish') | |
def read_input(self): | |
f = open(self.input_file_path, "r") | |
while True: | |
line = f.readline() | |
if not line: break | |
p = line.split() | |
u = int(p[0]) | |
v = int(p[1]) | |
t = int(p[2]) | |
r = int(p[3]) | |
if t == 0: | |
pintrust.add_rating(u, v, r) | |
elif t == 1: | |
if u == self.target_user: | |
# random cut 25% | |
if random.randrange(1,4) == 1: | |
self.cut[v]=1 | |
continue | |
else: | |
self.alive[v]=1 | |
pintrust.add_trust(u, v) | |
elif t == -1: | |
pintrust.add_distrust(u, v) | |
f.close() | |
def write_output(self, output): | |
f = open(self.output_file_path, "w") | |
hit_cut = 0 | |
hit_alive = 0 | |
for i in range(0, len(output)): | |
v = output[i][0] | |
trust = str(output[i][1]) | |
distrust = str(output[i][2]) | |
if v in self.cut: | |
hit_cut += 1 | |
if v in self.alive: | |
hit_alive += 1 | |
if i > 0 and (i==10 or i==100 or i==1000 or i==1000): | |
f.write('----------topk---------- :'+str(i)) | |
f.write('cut size :' + str(len(self.cut))) | |
f.write('hit_cut :' + str(hit_cut)) | |
f.write('alive size:' + str(len(self.alive))) | |
f.write('hit_alive :' + str(hit_alive)) | |
f.write(str(v) + " (" + trust + "," + distrust + ") \n") | |
f.close() | |
def check_output(self): | |
f = open(self.output_file_path, "r") | |
hit_cut = 0 | |
hit_alive = 0 | |
while True: | |
line = f.readline() | |
if not line: break | |
p = line.split() | |
v = int(p[0]) | |
if v in self.cut: | |
hit_cut += 1 | |
if v in self.alive: | |
hit_alive += 1 | |
print('cut size :' + str(len(self.cut))) | |
print('hit_cut :' + str(hit_cut)) | |
print('alive size:' + str(len(self.alive))) | |
print('hit_alive :' + str(hit_alive)) | |
f.close() | |
pintrust = Pintrust() | |
pintrust.run() | |
#pintrust.check_output() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment