cayetanobv · June 21, 2022 12:26
diff --git a/fast_euclidean_dist.py b/fast_euclidean_dist.py
 import numpy as np
 import pandas as pd
 import pyproj

 from sklearn.metrics import pairwise

 # Generate random coordinates (Madrid area)
 SAMPLE_SIZE_1 = 100000
 SAMPLE_SIZE_2 = 2000

 coords1 = np.random.normal(
    loc=(-3.7, 40.4),
    scale=(0.05, 0.05),
    size=(SAMPLE_SIZE_1, 2)
 )

 coords2 = np.random.normal(
    loc=(-3.7, 40.4),
    scale=(0.05, 0.05),
    size=(SAMPLE_SIZE_2, 2)
 )

 # Transform CRS to use meters
 transformer = pyproj.Transformer.from_crs('EPSG:4326', 'EPSG:25830', always_xy=True)
 new_coords1 = transformer.transform(coords1[:, 0], coords1[:, 1])
 new_coords2 = transformer.transform(coords2[:, 0], coords2[:, 1])


 # Compute distances very fast using euclidean sklearn formulation
 res = pairwise.euclidean_distances(np.array(new_coords1).T, np.array(new_coords2).T)

 # Get distances <= 100 meters
 res_100 = res <= 100
 res_100_check = np.any(res_100, axis=1)

 # Uncomment above lines to export to CSV and Debug in QGIS
 # df1 = pd.DataFrame(np.array(new_coords1).T, columns=['x', 'y'])
 # df1['res'] = res_100_check
 # df1.to_csv('/tmp/x1.csv', index=False)
 # df2 = pd.DataFrame(np.array(new_coords2).T, columns=['x', 'y'])
 # df2.to_csv('/tmp/x2.csv', index=False)
	import numpy as np
	import pandas as pd
	import pyproj

	from sklearn.metrics import pairwise

	# Generate random coordinates (Madrid area)
	SAMPLE_SIZE_1 = 100000
	SAMPLE_SIZE_2 = 2000

	coords1 = np.random.normal(
	loc=(-3.7, 40.4),
	scale=(0.05, 0.05),
	size=(SAMPLE_SIZE_1, 2)
	)

	coords2 = np.random.normal(
	loc=(-3.7, 40.4),
	scale=(0.05, 0.05),
	size=(SAMPLE_SIZE_2, 2)
	)

	# Transform CRS to use meters
	transformer = pyproj.Transformer.from_crs('EPSG:4326', 'EPSG:25830', always_xy=True)
	new_coords1 = transformer.transform(coords1[:, 0], coords1[:, 1])
	new_coords2 = transformer.transform(coords2[:, 0], coords2[:, 1])


	# Compute distances very fast using euclidean sklearn formulation
	res = pairwise.euclidean_distances(np.array(new_coords1).T, np.array(new_coords2).T)

	# Get distances <= 100 meters
	res_100 = res <= 100
	res_100_check = np.any(res_100, axis=1)

	# Uncomment above lines to export to CSV and Debug in QGIS
	# df1 = pd.DataFrame(np.array(new_coords1).T, columns=['x', 'y'])
	# df1['res'] = res_100_check
	# df1.to_csv('/tmp/x1.csv', index=False)
	# df2 = pd.DataFrame(np.array(new_coords2).T, columns=['x', 'y'])
	# df2.to_csv('/tmp/x2.csv', index=False)