cycyyy · November 28, 2021 04:51
diff --git a/aaa.py b/aaa.py
 import numpy as np 
 from sklearn.neighbors import NearestNeighbors
 from scipy import spatial
 import math
 from multiprocessing.pool import Pool
 import multiprocessing

 def main(X, Y, k, alpha):
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
    tree = spatial.KDTree(Y)
    distances, indices = nbrs.kneighbors(X)
    N = X.shape[0]
    M = Y.shape[0]
    res = 0.0
    b = B(k, alpha)
    #def get_upsilon(x):
    #    return tree.query(x, k)[0][-1]
    #pool = Pool(processes=40)
    #upsilons = pool.map(get_upsilon, X)
    #print(len(upsilons))
    for i in range(0, N):
        rho = distances[i][-1]
        upsilon = tree.query(X[i], k)[0][-1]
        if rho == 0 and upsilon == 0:
            res += 1
            continue
        print(i)
        res += first(M, N, rho, upsilon, alpha) * b
    return res / N

 def first(M, N, rho, upsilon, alpha):
    return math.pow(((N - 1) * rho / (M * upsilon)), 1 / alpha)

 import scipy.special as sc
 def B(k, alpha):
    return  gammadiv(sc.gammaln(k), sc.gammaln(k -alpha + 1)) * gammadiv(sc.gammaln(k), sc.gammaln(k + alpha - 1))

 def gammadiv(a, b):
    return math.exp(a - b)

 import gc
 import sys
 import os
 def mutli(k, alpha, start_index, end_index, N):
    try:
        sys.setrecursionlimit(1000000)
        X = np.load(str(start_index) + "x.npy")
        Y = np.load("y.npy")
        distances = np.load("dis.npy")
 #    nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
        tree = spatial.KDTree(Y)
 #    distances, indices = nbrs.kneighbors(X)
        M = Y.shape[0]
 #    N = X.shape[0]
        res = 0.0
        b = B(k, alpha)
        for i in range(0, end_index - start_index):
 #            if i % 10 == 0:
 #                print(i, end_index - start_index)
            rho = distances[i][-1]
            upsilon = tree.query(X[i], k)[0][-1]
            if rho == 0 and upsilon == 0:
                res += 1
                continue
            t = k
            while upsilon == 0:
                t *= 2
            upsilon = tree.query(X[i], t)[0][-1]
            res += first(M, N, rho, upsilon, alpha) * b
    except:
        print("waht?")
 #    print("finish")
    return res

 import faiss
 def mutli_f(k, alpha, start_index, end_index, N):
    try:
        sys.setrecursionlimit(1000000)
        X = np.load(str(start_index) + "x.npy").astype('float32')
        Y = np.load("y.npy").astype('float32')
        distances = np.load("dis.npy").astype('float32')
        tree = faiss.IndexFlatL2(Y.shape[1])
        tree.add(Y)
        M = Y.shape[0]
        res = 0.0
        b = B(k, alpha)
        for i in range(0, end_index - start_index):
 #            if i % 10 == 0:
 #                print(i, end_index - start_index)
            rho = distances[i][-1]
            upsilon = tree.search(X[i], k)[0][-1]
            if rho == 0 and upsilon == 0:
                res += 1
                continue
            t = k
            while upsilon == 0:
                t *= 2
            upsilon = tree.query(X[i], t)[0][-1]
            res += first(M, N, rho, upsilon, alpha) * b
    except Exception as e:
        print("waht?", e)
 #    print("finish")
    return res


 def main_mutli(X, Y, k, alpha):
    core = 40
    np.save("y.npy", Y)
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
    distances, indices = nbrs.kneighbors(X)
    np.save("dis.npy", distances)
    result = []
    pool = multiprocessing.Pool(processes=core)
    N = X.shape[0]
    start_index = 0
    step = N // (core - 1)
    for i in range(0, core):
        end_index  = start_index + step 
        if end_index > N:
            end_index = N
        np.save(str(start_index) + "x.npy", X[start_index:end_index])
        result.append(pool.apply_async(mutli_f, (k, alpha, start_index, end_index, N)))
 #        pool.apply_async(mutli, (k, alpha, start_index, end_index, N))
        start_index += step
    s = 0.0
    pool.close()
    for r in result:
        s += r.get()
    return s / N



 def main_f(X, Y, k, alphas):
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
    tree = faiss.IndexFlatL2(Y.shape[1]) 
    tree.add(Y)
    distances, indices = nbrs.kneighbors(X)
    N = X.shape[0]
    M = Y.shape[0]
    res = 0.0
    #def get_upsilon(x):
    #    return tree.query(x, k)[0][-1]
    #pool = Pool(processes=40)
    #upsilons = pool.map(get_upsilon, X)
    #print(len(upsilons))
    gpu = faiss.StandardGpuResources()
    tree = faiss.index_cpu_to_gpu(gpu, 0, tree)
    tmp_query = np.random.rand(1, Y.shape[1]).astype("float32")
    upsilons = tree.search(X, k)[0]

    reses = [0] * len(alphas)
    bs = []
    for alpha in alphas:
        bs.append(B(k, alpha))
    for i in range(0, N):
        rho = distances[i][-1]
        upsilon = upsilons[i][-1]
        if rho == 0 and upsilon == 0:
            res += 1
            continue
        t = k
        tmp_query[0] = X[i]
        while upsilon < 0.0001 and t <= 900:
            t += 100
            tmp_res = tree.search(tmp_query, t)[0][0]
 #            print(tmp_res, t)
            '''
            for p in tmp_res:
                upsilon = np.linalg.norm(X[i] - Y[p])
                if upsilon != 0:
                    break
        
            if t > 400:
                print(X[i])
                print(Y[tree.search(tmp_query, t)[1][0][-1]])
                print(np.sum(X[i] - Y[tree.search(tmp_query, t)[1][0][-1]]))
            '''
            for y in tmp_res:
                if y > 0.0001:
                    upsilon = y
                    break
        if upsilon < 0.0001:
            res += 1
            continue
            #upsilon = tree.search(tmp_query, t)[0][0][-1]
 #        print(upsilon, flush=True)
        for l in range(0, len(reses)):
            reses[l] += first(M, N, rho, upsilon, alphas[l]) * bs[l]
    for l in range(0, len(reses)):
        reses[l] /= N
    return reses
	import numpy as np
	from sklearn.neighbors import NearestNeighbors
	from scipy import spatial
	import math
	from multiprocessing.pool import Pool
	import multiprocessing

	def main(X, Y, k, alpha):
	nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
	tree = spatial.KDTree(Y)
	distances, indices = nbrs.kneighbors(X)
	N = X.shape[0]
	M = Y.shape[0]
	res = 0.0
	b = B(k, alpha)
	#def get_upsilon(x):
	# return tree.query(x, k)[0][-1]
	#pool = Pool(processes=40)
	#upsilons = pool.map(get_upsilon, X)
	#print(len(upsilons))
	for i in range(0, N):
	rho = distances[i][-1]
	upsilon = tree.query(X[i], k)[0][-1]
	if rho == 0 and upsilon == 0:
	res += 1
	continue
	print(i)
	res += first(M, N, rho, upsilon, alpha) * b
	return res / N

	def first(M, N, rho, upsilon, alpha):
	return math.pow(((N - 1) * rho / (M * upsilon)), 1 / alpha)

	import scipy.special as sc
	def B(k, alpha):
	return gammadiv(sc.gammaln(k), sc.gammaln(k -alpha + 1)) * gammadiv(sc.gammaln(k), sc.gammaln(k + alpha - 1))

	def gammadiv(a, b):
	return math.exp(a - b)

	import gc
	import sys
	import os
	def mutli(k, alpha, start_index, end_index, N):
	try:
	sys.setrecursionlimit(1000000)
	X = np.load(str(start_index) + "x.npy")
	Y = np.load("y.npy")
	distances = np.load("dis.npy")
	# nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
	tree = spatial.KDTree(Y)
	# distances, indices = nbrs.kneighbors(X)
	M = Y.shape[0]
	# N = X.shape[0]
	res = 0.0
	b = B(k, alpha)
	for i in range(0, end_index - start_index):
	# if i % 10 == 0:
	# print(i, end_index - start_index)
	rho = distances[i][-1]
	upsilon = tree.query(X[i], k)[0][-1]
	if rho == 0 and upsilon == 0:
	res += 1
	continue
	t = k
	while upsilon == 0:
	t *= 2
	upsilon = tree.query(X[i], t)[0][-1]
	res += first(M, N, rho, upsilon, alpha) * b
	except:
	print("waht?")
	# print("finish")
	return res

	import faiss
	def mutli_f(k, alpha, start_index, end_index, N):
	try:
	sys.setrecursionlimit(1000000)
	X = np.load(str(start_index) + "x.npy").astype('float32')
	Y = np.load("y.npy").astype('float32')
	distances = np.load("dis.npy").astype('float32')
	tree = faiss.IndexFlatL2(Y.shape[1])
	tree.add(Y)
	M = Y.shape[0]
	res = 0.0
	b = B(k, alpha)
	for i in range(0, end_index - start_index):
	# if i % 10 == 0:
	# print(i, end_index - start_index)
	rho = distances[i][-1]
	upsilon = tree.search(X[i], k)[0][-1]
	if rho == 0 and upsilon == 0:
	res += 1
	continue
	t = k
	while upsilon == 0:
	t *= 2
	upsilon = tree.query(X[i], t)[0][-1]
	res += first(M, N, rho, upsilon, alpha) * b
	except Exception as e:
	print("waht?", e)
	# print("finish")
	return res


	def main_mutli(X, Y, k, alpha):
	core = 40
	np.save("y.npy", Y)
	nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
	distances, indices = nbrs.kneighbors(X)
	np.save("dis.npy", distances)
	result = []
	pool = multiprocessing.Pool(processes=core)
	N = X.shape[0]
	start_index = 0
	step = N // (core - 1)
	for i in range(0, core):
	end_index = start_index + step
	if end_index > N:
	end_index = N
	np.save(str(start_index) + "x.npy", X[start_index:end_index])
	result.append(pool.apply_async(mutli_f, (k, alpha, start_index, end_index, N)))
	# pool.apply_async(mutli, (k, alpha, start_index, end_index, N))
	start_index += step
	s = 0.0
	pool.close()
	for r in result:
	s += r.get()
	return s / N



	def main_f(X, Y, k, alphas):
	nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
	tree = faiss.IndexFlatL2(Y.shape[1])
	tree.add(Y)
	distances, indices = nbrs.kneighbors(X)
	N = X.shape[0]
	M = Y.shape[0]
	res = 0.0
	#def get_upsilon(x):
	# return tree.query(x, k)[0][-1]
	#pool = Pool(processes=40)
	#upsilons = pool.map(get_upsilon, X)
	#print(len(upsilons))
	gpu = faiss.StandardGpuResources()
	tree = faiss.index_cpu_to_gpu(gpu, 0, tree)
	tmp_query = np.random.rand(1, Y.shape[1]).astype("float32")
	upsilons = tree.search(X, k)[0]

	reses = [0] * len(alphas)
	bs = []
	for alpha in alphas:
	bs.append(B(k, alpha))
	for i in range(0, N):
	rho = distances[i][-1]
	upsilon = upsilons[i][-1]
	if rho == 0 and upsilon == 0:
	res += 1
	continue
	t = k
	tmp_query[0] = X[i]
	while upsilon < 0.0001 and t <= 900:
	t += 100
	tmp_res = tree.search(tmp_query, t)[0][0]
	# print(tmp_res, t)
	'''
	for p in tmp_res:
	upsilon = np.linalg.norm(X[i] - Y[p])
	if upsilon != 0:
	break

	if t > 400:
	print(X[i])
	print(Y[tree.search(tmp_query, t)[1][0][-1]])
	print(np.sum(X[i] - Y[tree.search(tmp_query, t)[1][0][-1]]))
	'''
	for y in tmp_res:
	if y > 0.0001:
	upsilon = y
	break
	if upsilon < 0.0001:
	res += 1
	continue
	#upsilon = tree.search(tmp_query, t)[0][0][-1]
	# print(upsilon, flush=True)
	for l in range(0, len(reses)):
	reses[l] += first(M, N, rho, upsilon, alphas[l]) * bs[l]
	for l in range(0, len(reses)):
	reses[l] /= N
	return reses