Last active
August 29, 2015 14:14
-
-
Save richarddunks/6eb8945f71e75511b099 to your computer and use it in GitHub Desktop.
Self Join to find nearest neighbor for bicycle racks in NYC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Self join in SQL to find minimum distance between NYCDOT bicycle racks | |
--using this data on CartoDB: https://richard-datapolitan.cartodb.com/tables/dot_bicycle_racks | |
--This is based on a problem presented by Joss Philippe at the MaptimeNYC Jan 28,2015 meeting. | |
--http://www.meetup.com/Maptime-NYC/events/219945042/ | |
--This will calculate a self join of each data point to every other and calculate the distance | |
--Since the data is in WGS84, the distance measure is in radians. We'll calculate meters later | |
WITH query AS ( | |
SELECT d1.cartodb_id AS cid1, d2.cartodb_id AS cid2, ST_DISTANCE(d1.the_geom,d2.the_geom) AS dist | |
FROM dot_bicycle_racks d1, dot_bicycle_racks d2 --the self join happens here | |
WHERE d1.cartodb_id != d2.cartodb_id --to not count the self join since that will be a 0 distance | |
), min_calc AS ( | |
SELECT cid1, cid2, dist | |
FROM ( | |
--this uses a window function to find the minimum distance for each row, identified by cartodb_id | |
SELECT query.*, MIN(dist) OVER (PARTITION BY cid1) AS mindist | |
FROM query | |
) AS alias | |
WHERE dist = mindist --only return the instance with the minimum distance | |
) | |
--update the nn_name field with the cartodb_id of the nearest bicycle rack | |
UPDATE dot_bicycle_racks d SET nn_name = mc.cid2 FROM min_calc mc WHERE mc.cid1 = d.cartodb_id; | |
--Now that we have the cartodb_id of the nearest bicycle rack, we want to populate the name and geometry | |
--for the closest rack. This is done with another self join but instead of a cross join, we're just | |
--joining on the nearest neighbor | |
SELECT | |
d1.cartodb_id, | |
d1.name, | |
d1.small, | |
d1.large, | |
d1.circular, | |
d1.mini_hoop, | |
d1.total_rack, | |
d1.geom, | |
d2.name AS nn_name, | |
d2.geom AS nn_geom, | |
--calculating distance with SRID 2163 (US National Atlas Equal area) gives a result in meters | |
ST_DISTANCE(ST_TRANSFORM(d1.geom,2163),ST_TRANSFORM(d2.geom,2163)) AS distance_meters | |
FROM dot_bicycle_racks d1 | |
INNER JOIN dot_bicycle_racks d2 | |
ON d1.nn_name::numeric = d2.cartodb_id | |
--the cartodb_id got cast into varchar at some point. | |
--The cast into numeric is relatively cheap computationally in this case | |
--and saves us having to transform the geometry in the initial cross join | |
ORDER BY d1.cartodb_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment