Last active
May 14, 2019 03:50
-
-
Save BarclayII/4456ad2ab2b6bb23350af5bbe25e2104 to your computer and use it in GitHub Desktop.
Compute significant PPR for all nodes with Reverse Push
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scipy.sparse as ssp | |
import numpy as np | |
from sklearn.preprocessing import normalize | |
from numba.typed import Dict | |
from numba import jit, types, njit, prange | |
@njit(parallel=True, nogil=True) | |
def reverse_push_bipartite(A_indptr, A_indices, A_data, nL, nR, r_max, alpha): | |
''' | |
Compute significant PPRs based on 2-hop random walks with restart on the | |
right side. | |
A_indptr: CSC indptr | |
A_indices: CSC indices | |
A_data: CSC data | |
nL: number of nodes on left side | |
nR: number of nodes on right side | |
r_max: residual, a small positive real number | |
alpha: restart probability | |
Return: | |
List[Dict[int, float]] | |
For nL <= v < nL + nR, if PPR(u, v) > r_max, then it would show up as | |
PPR(u, v) = result[v - nL][u] | |
with an error of o(r_max) | |
''' | |
result = [Dict.empty(key_type=types.int64, value_type=types.float64) | |
for _ in range(nL, nL + nR)] | |
for t in prange(nL, nL + nR): | |
r = Dict.empty(key_type=types.int64, value_type=types.float64) | |
p = Dict.empty(key_type=types.int64, value_type=types.float64) | |
r[t] = 1. | |
while True: | |
for v, r_v in r.items(): | |
if r_v > r_max: | |
break | |
else: | |
break | |
A_v_indices = A_indices[A_indptr[v]:A_indptr[v+1]] | |
A_v_data = A_data[A_indptr[v]:A_indptr[v+1]] | |
a = alpha if v >= nL else 0 | |
for i, d in zip(A_v_indices, A_v_data): | |
r[i] = r.get(i, 0) + (1 - a) * d * r_v | |
if a > 0: | |
p[v] = p.get(v, 0) + a * r_v | |
del r[v] | |
result[t - nL] = p | |
print(t) | |
return result | |
if __name__ == '__main__': | |
# 0, 1, 2, 3 are on the left | |
# 4, 5, 6, 7 are on the right | |
# We want to compute all PPR(u, v) > 0.01 with restart probability 0.2, | |
# based on 2-hop random walks on the right side. | |
A = ssp.coo_matrix( | |
(np.ones(9), | |
# source nodes destination nodes | |
([0, 0, 1, 2, 3, 4, 5, 6, 7], [5, 6, 6, 7, 6, 0, 1, 2, 3]))) | |
A = normalize(A, norm='l1', axis=1).tocsc() | |
p = reverse_push_bipartite( | |
A.indptr, A.indices, A.data, 4, 4, 0.01, 0.2) | |
print('Converting to dict') | |
p = [dict(d) for d in p] | |
for i, d in enumerate(p): | |
for u, v in d.items(): | |
print('PPR(%d, %d) = %f' % (u, 4 + i, v)) | |
# Result: | |
# PPR(4, 4) = 0.200000 | |
# PPR(5, 5) = 0.200000 | |
# PPR(4, 5) = 0.080000 | |
# PPR(6, 6) = 0.551456 | |
# PPR(4, 6) = 0.390993 | |
# PPR(5, 6) = 0.439320 | |
# PPR(7, 6) = 0.439320 | |
# PPR(7, 7) = 0.551456 | |
# PPR(6, 7) = 0.439320 | |
# PPR(4, 7) = 0.310993 | |
# PPR(5, 7) = 0.351456 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment