Skip to content

Instantly share code, notes, and snippets.

@cayetanobv
Last active June 21, 2022 12:26
Show Gist options
  • Save cayetanobv/b066bd5c7565786dc8f1cea8dbe42c9b to your computer and use it in GitHub Desktop.
Save cayetanobv/b066bd5c7565786dc8f1cea8dbe42c9b to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import pyproj
from sklearn.metrics import pairwise
# Generate random coordinates (Madrid area)
SAMPLE_SIZE_1 = 100000
SAMPLE_SIZE_2 = 2000
coords1 = np.random.normal(
loc=(-3.7, 40.4),
scale=(0.05, 0.05),
size=(SAMPLE_SIZE_1, 2)
)
coords2 = np.random.normal(
loc=(-3.7, 40.4),
scale=(0.05, 0.05),
size=(SAMPLE_SIZE_2, 2)
)
# Transform CRS to use meters
transformer = pyproj.Transformer.from_crs('EPSG:4326', 'EPSG:25830', always_xy=True)
new_coords1 = transformer.transform(coords1[:, 0], coords1[:, 1])
new_coords2 = transformer.transform(coords2[:, 0], coords2[:, 1])
# Compute distances very fast using euclidean sklearn formulation
res = pairwise.euclidean_distances(np.array(new_coords1).T, np.array(new_coords2).T)
# Get distances <= 100 meters
res_100 = res <= 100
res_100_check = np.any(res_100, axis=1)
# Uncomment above lines to export to CSV and Debug in QGIS
# df1 = pd.DataFrame(np.array(new_coords1).T, columns=['x', 'y'])
# df1['res'] = res_100_check
# df1.to_csv('/tmp/x1.csv', index=False)
# df2 = pd.DataFrame(np.array(new_coords2).T, columns=['x', 'y'])
# df2.to_csv('/tmp/x2.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment