Last active
August 29, 2015 14:19
-
-
Save daijiang/5e93ea3f1eec43445bd6 to your computer and use it in GitHub Desktop.
Distance calculation based on latitude and longitude
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Here are codes to calculate distance of points based on their lat and long coordinations. | |
# The Python version is faster than R version. | |
# Data are presented within R code. For 9 points, in my old laptop, | |
# R took 0.011 seconds while Python used 0.00595 seconds. | |
# For a dataset with 1,000 million points, Python version finished in 20 mins, | |
# but R needs ~62 hours! | |
## R version. ============================================================ | |
library(dplyr) | |
library(sp) | |
pp = read.csv("~/Documents/points.csv") | |
# pts pickup_longitude pickup_latitude dropoff_longitude dropoff_latitude | |
# 1 -73.97349 40.77980 -73.97342 40.75856 | |
# 2 -73.97534 40.75577 -73.95689 40.77539 | |
# 3 -73.95567 40.77672 -73.98109 40.78081 | |
# 4 -73.98205 40.77904 -73.95519 40.77434 | |
# 5 -73.97197 40.76250 -73.97358 40.79469 | |
# 6 -73.98004 40.78619 -73.97630 40.79461 | |
# 7 -73.99150 40.76628 -73.98544 40.76862 | |
# 8 -73.98467 40.76813 -73.99794 40.74097 | |
# 9 -73.99796 40.74096 -73.99930 40.74418 | |
get_dist = function(x) data.frame(actual_dist = spDistsN1( | |
matrix(c(x$pickup_longitude, x$pickup_latitude), ncol = 2), | |
c(x$dropoff_longitude, x$dropoff_latitude), longlat = TRUE) * 0.621) | |
# in miles | |
system.time( | |
pp$act_dist <- | |
pp %>% | |
rowwise() %>% | |
do(get_dist(.))) | |
# user system elapsed | |
# 0.011 0.000 0.011 | |
pp | |
# pts pickup_longitude pickup_latitude dropoff_longitude dropoff_latitude actual_dist | |
# 1 -73.97349 40.77980 -73.97342 40.75856 1.4628708 | |
# 2 -73.97534 40.75577 -73.95689 40.77539 1.6619633 | |
# 3 -73.95567 40.77672 -73.98109 40.78081 1.3621336 | |
# 4 -73.98205 40.77904 -73.95519 40.77434 1.4445393 | |
# 5 -73.97197 40.76250 -73.97358 40.79469 2.2189531 | |
# 6 -73.98004 40.78619 -73.97630 40.79461 0.6119867 | |
# 7 -73.99150 40.76628 -73.98544 40.76862 0.3563861 | |
# 8 -73.98467 40.76813 -73.99794 40.74097 1.9960550 | |
# 9 -73.99796 40.74096 -73.99930 40.74418 0.2327583 | |
## Python version ============================================================= | |
import geopy | |
from geopy.distance import vincenty | |
## function to get distance in miles for one point | |
def get_dist_mile(x): | |
p1 = (x[2], x[1]) | |
p2 = (x[4], x[3]) | |
return vincenty(p1, p2).miles | |
# read data | |
import pandas as pd | |
pp = pd.read_csv("/home/dli/Documents/points.csv", sep = ",") | |
# calculate distance for all points. | |
import timeit | |
timeit.Timer("pp.loc[:,'adist'] = pp.apply(get_dist_mile, axis = 1)","from __main__ import pp, get_dist_mile").timeit(1) | |
# 0.0059549808502197266 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Try