Last active
June 21, 2022 12:26
-
-
Save cayetanobv/b066bd5c7565786dc8f1cea8dbe42c9b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import pyproj | |
from sklearn.metrics import pairwise | |
# Generate random coordinates (Madrid area) | |
SAMPLE_SIZE_1 = 100000 | |
SAMPLE_SIZE_2 = 2000 | |
coords1 = np.random.normal( | |
loc=(-3.7, 40.4), | |
scale=(0.05, 0.05), | |
size=(SAMPLE_SIZE_1, 2) | |
) | |
coords2 = np.random.normal( | |
loc=(-3.7, 40.4), | |
scale=(0.05, 0.05), | |
size=(SAMPLE_SIZE_2, 2) | |
) | |
# Transform CRS to use meters | |
transformer = pyproj.Transformer.from_crs('EPSG:4326', 'EPSG:25830', always_xy=True) | |
new_coords1 = transformer.transform(coords1[:, 0], coords1[:, 1]) | |
new_coords2 = transformer.transform(coords2[:, 0], coords2[:, 1]) | |
# Compute distances very fast using euclidean sklearn formulation | |
res = pairwise.euclidean_distances(np.array(new_coords1).T, np.array(new_coords2).T) | |
# Get distances <= 100 meters | |
res_100 = res <= 100 | |
res_100_check = np.any(res_100, axis=1) | |
# Uncomment above lines to export to CSV and Debug in QGIS | |
# df1 = pd.DataFrame(np.array(new_coords1).T, columns=['x', 'y']) | |
# df1['res'] = res_100_check | |
# df1.to_csv('/tmp/x1.csv', index=False) | |
# df2 = pd.DataFrame(np.array(new_coords2).T, columns=['x', 'y']) | |
# df2.to_csv('/tmp/x2.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment