Skip to content

Instantly share code, notes, and snippets.

@BarclayII
Last active May 14, 2019 03:50
Show Gist options
  • Save BarclayII/4456ad2ab2b6bb23350af5bbe25e2104 to your computer and use it in GitHub Desktop.
Save BarclayII/4456ad2ab2b6bb23350af5bbe25e2104 to your computer and use it in GitHub Desktop.
Compute significant PPR for all nodes with Reverse Push
import scipy.sparse as ssp
import numpy as np
from sklearn.preprocessing import normalize
from numba.typed import Dict
from numba import jit, types, njit, prange
@njit(parallel=True, nogil=True)
def reverse_push_bipartite(A_indptr, A_indices, A_data, nL, nR, r_max, alpha):
'''
Compute significant PPRs based on 2-hop random walks with restart on the
right side.
A_indptr: CSC indptr
A_indices: CSC indices
A_data: CSC data
nL: number of nodes on left side
nR: number of nodes on right side
r_max: residual, a small positive real number
alpha: restart probability
Return:
List[Dict[int, float]]
For nL <= v < nL + nR, if PPR(u, v) > r_max, then it would show up as
PPR(u, v) = result[v - nL][u]
with an error of o(r_max)
'''
result = [Dict.empty(key_type=types.int64, value_type=types.float64)
for _ in range(nL, nL + nR)]
for t in prange(nL, nL + nR):
r = Dict.empty(key_type=types.int64, value_type=types.float64)
p = Dict.empty(key_type=types.int64, value_type=types.float64)
r[t] = 1.
while True:
for v, r_v in r.items():
if r_v > r_max:
break
else:
break
A_v_indices = A_indices[A_indptr[v]:A_indptr[v+1]]
A_v_data = A_data[A_indptr[v]:A_indptr[v+1]]
a = alpha if v >= nL else 0
for i, d in zip(A_v_indices, A_v_data):
r[i] = r.get(i, 0) + (1 - a) * d * r_v
if a > 0:
p[v] = p.get(v, 0) + a * r_v
del r[v]
result[t - nL] = p
print(t)
return result
if __name__ == '__main__':
# 0, 1, 2, 3 are on the left
# 4, 5, 6, 7 are on the right
# We want to compute all PPR(u, v) > 0.01 with restart probability 0.2,
# based on 2-hop random walks on the right side.
A = ssp.coo_matrix(
(np.ones(9),
# source nodes destination nodes
([0, 0, 1, 2, 3, 4, 5, 6, 7], [5, 6, 6, 7, 6, 0, 1, 2, 3])))
A = normalize(A, norm='l1', axis=1).tocsc()
p = reverse_push_bipartite(
A.indptr, A.indices, A.data, 4, 4, 0.01, 0.2)
print('Converting to dict')
p = [dict(d) for d in p]
for i, d in enumerate(p):
for u, v in d.items():
print('PPR(%d, %d) = %f' % (u, 4 + i, v))
# Result:
# PPR(4, 4) = 0.200000
# PPR(5, 5) = 0.200000
# PPR(4, 5) = 0.080000
# PPR(6, 6) = 0.551456
# PPR(4, 6) = 0.390993
# PPR(5, 6) = 0.439320
# PPR(7, 6) = 0.439320
# PPR(7, 7) = 0.551456
# PPR(6, 7) = 0.439320
# PPR(4, 7) = 0.310993
# PPR(5, 7) = 0.351456
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment