Created
March 20, 2017 08:39
-
-
Save n-bar/0ac7a64d493b799db811781be023fa50 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"TestResults(failed=0, attempted=2)\n" | |
] | |
} | |
], | |
"source": [ | |
"import math\n", | |
"import doctest\n", | |
"\n", | |
"R = 6371.0 # radius of the earth in km\n", | |
"dlat = 6371.0 / 180 * math.pi # ≈ 111.19 km\n", | |
"\n", | |
"deg_rd = 2 * math.pi / 360. # radians\n", | |
"\n", | |
"def euclid_dist(lat1, lon1, lat2, lon2):\n", | |
" \"\"\"\n", | |
" >>> euclid_dist(1, 0, -1, 0) == 2 * dlat\n", | |
" True\n", | |
" >>> euclid_dist(1, 1, 1, 1) == 0\n", | |
" True\n", | |
" \"\"\"\n", | |
" \n", | |
" coef = math.cos( (lat2 + lat1) * .5 * deg_rd) # degree -> radians; (lat2 + lat1) * .5 / 180.0\n", | |
" x = (lon2 - lon1) * coef\n", | |
" y = lat2 - lat1\n", | |
" d = (x*x + y*y)**.5 * dlat\n", | |
" \n", | |
" return d\n", | |
"\n", | |
"if __name__ == \"__main__\":\n", | |
" print(doctest.testmod())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(50.194, -85.499, -80.99, -49.822)\n" | |
] | |
} | |
], | |
"source": [ | |
"from random import randint, seed\n", | |
"\n", | |
"\n", | |
"seed(42)\n", | |
"\n", | |
"rnd_lat = lambda lat=180: randint(-lat*1e3, lat*1e3) / 1e3\n", | |
"rnd_lon = lambda lon=90: randint(-lon*1e3, lon*1e3) / 1e3\n", | |
"\n", | |
"p = rnd_lat(), rnd_lon(), rnd_lat(), rnd_lon()\n", | |
"print(p)\n", | |
"lat1, lon1, lat2, lon2 = p" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%load_ext line_profiler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000000 loops, best of 3: 1.25 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000000 -r 3 euclid_dist(lat1, lon1, lat2, lon2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%lprun -f euclid_dist euclid_dist(lat1, lon1, lat2, lon2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Python" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def euclid_dist(lat1, lon1, lat2, lon2): \n", | |
" x = (lon2 - lon1) * math.cos((lat1 + lat2) * .5 * deg_rd)\n", | |
" y = lat2 - lat1\n", | |
" return (x*x + y*y)**.5 * dlat" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000000 loops, best of 3: 1.16 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000000 -r 3 euclid_dist(lat1, lon1, lat2, lon2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%lprun -f euclid_dist euclid_dist(lat1, lon1, lat2, lon2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Numba" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import numba\n", | |
"\n", | |
"\n", | |
"@numba.jit([\"float64(float64, float64, float64, float64)\"], nopython=True, target='cpu', cache=True)\n", | |
"def nu_euclid_dist(lat1, lon1, lat2, lon2): \n", | |
" x = (lon2 - lon1) * math.cos((lat1 + lat2) * .5 * deg_rd)\n", | |
" y = lat2 - lat1\n", | |
" return (x*x + y*y)**.5 * dlat" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000000 loops, best of 3: 456 ns per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000000 -r 3 nu_euclid_dist(lat1, lon1, lat2, lon2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Cython" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%load_ext cython" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%%cython\n", | |
"cimport cython\n", | |
"from libc.math cimport cos, pi, sqrt\n", | |
"\n", | |
"\n", | |
"cdef double R = 6371.0 # radius of the earth in km\n", | |
"cdef double dlat = R / 180.0 * pi # ≈ 111.19 km\n", | |
"cdef double deg_rd = 2 * pi / 360.\n", | |
"\n", | |
"def cy_euclid_dist(double lat1, double lon1, double lat2, double lon2):\n", | |
" cdef double lat_avg_rad = (lat2 + lat1) *.5 * deg_rd\n", | |
" cdef double coeff = cos(lat_avg_rad)\n", | |
" cdef double x = (lon2 - lon1) * coeff\n", | |
" cdef double y = lat2 - lat1\n", | |
"\n", | |
" return sqrt(x*x + y*y) * dlat" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1000000 loops, best of 3: 329 ns per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000000 -r 3 cy_euclid_dist(lat1, lon1, lat2, lon2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Collections" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[(-0.923, 0.393), (-0.712, -0.075)]" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"seed(43)\n", | |
"target_points = [(rnd_lat(1), rnd_lon(1)) for _ in range(10)]\n", | |
"target_points[:2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[(-0.024, -0.323), (-0.837, -0.856)]" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"seed(45)\n", | |
"rows = int(1e5)\n", | |
"checked_points = [[(rnd_lat(1), rnd_lon(1)) for _ in range(randint(1, 5))] for _ in range(rows)]\n", | |
"checked_points = [zip(*p) for p in checked_points]\n", | |
"checked_points[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Задача - подсчитать количество строк где есть хоть одна координата на допустимом растоянии от проверямой коллекции точек." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.62%\n" | |
] | |
} | |
], | |
"source": [ | |
"class HasGoodPoint:\n", | |
" def __init__(self, lats, lons):\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" self.lats = lats\n", | |
" self.lons = lons\n", | |
" self.lat_max = max(self.lats) + .5\n", | |
" self.lon_max = max(self.lons) + .5\n", | |
" self.lat_min = min(self.lats) - .5\n", | |
" self.lon_min = min(self.lons) - .5\n", | |
" self.idx_1 = range(len(self.lats))\n", | |
" \n", | |
" def __call__(self, lats, lons, max_dist_km):\n", | |
" assert 0 < max_dist_km <= 10, u\"max_dist_km вне допустимого диапазона: (0, 10]\"\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" idx_2 = len(lats)\n", | |
" for i in range(idx_2):\n", | |
" lat_2, lon_2 = lats[i], lons[i]\n", | |
" if not self.lat_min <= lat_2 <= self.lat_max:\n", | |
" continue\n", | |
" if not self.lon_min <= lon_2 <= self.lon_max:\n", | |
" continue\n", | |
" for x in self.idx_1:\n", | |
" if euclid_dist(self.lats[x], self.lons[x], lat_2, lon_2) <= max_dist_km:\n", | |
" return True\n", | |
" return False\n", | |
"\n", | |
"lats, lons = zip(*target_points)\n", | |
"has_point = HasGoodPoint(lats, lons)\n", | |
"print(\"%.2f%%\" % (sum(has_point(*p, max_dist_km=3) for p in checked_points) / float(rows) * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 loops, best of 3: 4.83 s per loop\n", | |
"rows/sec: 20047\n" | |
] | |
} | |
], | |
"source": [ | |
"t = %timeit -on 2 -r 3 sum(has_point(*p, max_dist_km=3) for p in checked_points)\n", | |
"print(\"rows/sec: %.f\" % ((t.repeat * t.loops * rows) / sum(t.all_runs)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.62%\n" | |
] | |
} | |
], | |
"source": [ | |
"def HasGoodPoint(lats, lons):\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" lat_max = max(lats) + .5\n", | |
" lon_max = max(lons) + .5\n", | |
" lat_min = min(lats) - .5\n", | |
" lon_min = min(lons) - .5\n", | |
" idx_1 = range(len(lats))\n", | |
" \n", | |
" def has(lats_2, lons_2, max_dist_km):\n", | |
"\n", | |
" assert 0 < max_dist_km <= 10, u\"max_dist_km вне допустимого диапазона: (0, 10]\"\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" idx_2 = len(lats_2)\n", | |
"\n", | |
" for i in range(idx_2):\n", | |
" lat_2, lon_2 = lats_2[i], lons_2[i]\n", | |
" if not lat_min <= lat_2 <= lat_max:\n", | |
" continue\n", | |
" if not lon_min <= lon_2 <= lon_max:\n", | |
" continue\n", | |
" for x in idx_1:\n", | |
" if euclid_dist(lats[x], lons[x], lat_2, lon_2) <= max_dist_km:\n", | |
" return True\n", | |
" return False\n", | |
" \n", | |
" return has\n", | |
"\n", | |
"has_point = HasGoodPoint(lats, lons)\n", | |
"print(\"%.2f%%\" % (sum(has_point(*p, max_dist_km=3) for p in checked_points) / float(rows) * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 loops, best of 3: -5.83e+09 ns per loop\n", | |
"rows/sec: 80413\n" | |
] | |
} | |
], | |
"source": [ | |
"t = %timeit -on 2 -r 3 sum(has_point(*p, max_dist_km=3) for p in checked_points)\n", | |
"print(\"rows/sec: %.f\" % ((t.repeat * t.loops * rows) / sum(t.all_runs)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Numba" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.62%\n" | |
] | |
} | |
], | |
"source": [ | |
"def HasGoodPoint(lats, lons):\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" lat_max = max(lats) + .5\n", | |
" lon_max = max(lons) + .5\n", | |
" lat_min = min(lats) - .5\n", | |
" lon_min = min(lons) - .5\n", | |
" idx_1 = range(len(lats))\n", | |
" \n", | |
" def has(lats_2, lons_2, max_dist_km):\n", | |
"\n", | |
" assert 0 < max_dist_km <= 10, u\"max_dist_km вне допустимого диапазона: (0, 10]\"\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" idx_2 = len(lats_2)\n", | |
" for i in range(idx_2):\n", | |
" lat_2, lon_2 = lats_2[i], lons_2[i]\n", | |
" if not lat_min <= lat_2 <= lat_max:\n", | |
" continue\n", | |
" if not lon_min <= lon_2 <= lon_max:\n", | |
" continue\n", | |
" for x in idx_1:\n", | |
" if nu_euclid_dist(lats[x], lons[x], lat_2, lon_2) <= max_dist_km:\n", | |
" return True\n", | |
" return False\n", | |
" \n", | |
" return has\n", | |
"\n", | |
"has_point = HasGoodPoint(lats, lons)\n", | |
"print(\"%.2f%%\" % (sum(has_point(*p, max_dist_km=3) for p in checked_points) / float(rows) * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 loops, best of 3: 2.13 s per loop\n", | |
"rows/sec: 46377\n" | |
] | |
} | |
], | |
"source": [ | |
"t = %timeit -on 2 -r 3 sum(has_point(*p, max_dist_km=3) for p in checked_points)\n", | |
"print(\"rows/sec: %.f\" % ((t.repeat * t.loops * rows) / sum(t.all_runs)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Cython 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.62%\n" | |
] | |
} | |
], | |
"source": [ | |
"def HasGoodPoint(lats, lons):\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" lat_max = max(lats) + .5\n", | |
" lon_max = max(lons) + .5\n", | |
" lat_min = min(lats) - .5\n", | |
" lon_min = min(lons) - .5\n", | |
" idx = range(len(lats))\n", | |
" \n", | |
" def has(lats_2, lons_2, max_dist_km):\n", | |
"\n", | |
" assert 0 < max_dist_km <= 10, u\"max_dist_km вне допустимого диапазона: (0, 10]\"\n", | |
" assert len(lats) == len(lons), u\"Длины списков должны совпадать\"\n", | |
" n = len(lats_2)\n", | |
" for i in range(n):\n", | |
" lat_2, lon_2 = lats_2[i], lons_2[i]\n", | |
" if not lat_min <= lat_2 <= lat_max:\n", | |
" continue\n", | |
" if not lon_min <= lon_2 <= lon_max:\n", | |
" continue\n", | |
" for x in idx:\n", | |
" if cy_euclid_dist(lats[x], lons[x], lat_2, lon_2) <= max_dist_km:\n", | |
" return True\n", | |
" return False\n", | |
" \n", | |
" return has\n", | |
"\n", | |
"has_point = HasGoodPoint(lats, lons)\n", | |
"print(\"%.2f%%\" % (sum(has_point(*p, max_dist_km=3) for p in checked_points) / float(rows) * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 loops, best of 3: 1.79 s per loop\n", | |
"rows/sec: 55190\n" | |
] | |
} | |
], | |
"source": [ | |
"t = %timeit -on 2 -r 3 sum(has_point(*p, max_dist_km=3) for p in checked_points)\n", | |
"print(\"rows/sec: %.f\" % ((t.repeat * t.loops * rows) / sum(t.all_runs)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Cython 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%%cython\n", | |
"from libc.math cimport cos, pi, sqrt\n", | |
"from cpython cimport bool\n", | |
"\n", | |
"\n", | |
"cdef double R = 6371.0 # radius of the earth in km\n", | |
"cdef double dlat = R / 180.0 * pi # ≈ 111.19 km\n", | |
"cdef double deg_rd = 2 * pi / 360.\n", | |
"\n", | |
"\n", | |
"cdef double cyx_euclid_dist(double lat_1, double lon_1, double lat_2, double lon_2):\n", | |
" cdef double lat_avg_rad = (lat_2 + lat_1) *.5 * deg_rd\n", | |
" cdef double coeff = cos(lat_avg_rad)\n", | |
" cdef double x = (lon_2 - lon_1) * coeff\n", | |
" cdef double y = lat_2 - lat_1\n", | |
"\n", | |
" return sqrt(x*x + y*y) * dlat\n", | |
"\n", | |
"\n", | |
"cdef bool cyx_is_near(double lat_1, double lon_1, double lat_2, double lon_2, float max_dist_km):\n", | |
" return cyx_euclid_dist(lat_1, lon_1, lat_2, lon_2) <= max_dist_km\n", | |
" \n", | |
"\n", | |
"def HasGoodPoint(tuple lats_1, tuple lons_1):\n", | |
" cdef double lat_max, lon_max, lat_min, lon_min\n", | |
" lat_max = max(lats_1) + .5\n", | |
" lon_max = max(lons_1) + .5\n", | |
" lat_min = min(lats_1) - .5\n", | |
" lon_min = min(lons_1) - .5\n", | |
" \n", | |
" def has(tuple lats_2, tuple lons_2, double max_dist_km):\n", | |
" cdef double lat_2, lon_2\n", | |
" for i in xrange(len(lats_2)):\n", | |
" lat_2, lon_2 = lats_2[i], lons_2[i]\n", | |
" if not lat_min <= lat_2 <= lat_max:\n", | |
" continue\n", | |
" if not lon_min <= lon_2 <= lon_max:\n", | |
" continue\n", | |
" for x in xrange(len(lats_1)):\n", | |
" if cyx_is_near(lats_1[x], lons_1[x], lat_2, lon_2, max_dist_km):\n", | |
" return True\n", | |
" return False\n", | |
" return has" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.62%\n" | |
] | |
} | |
], | |
"source": [ | |
"has_point = HasGoodPoint(lats, lons)\n", | |
"print(\"%.2f%%\" % (sum(has_point(*p, max_dist_km=3) for p in checked_points) / float(rows) * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 loops, best of 3: 114 ms per loop\n", | |
"rows/sec: 874448\n" | |
] | |
} | |
], | |
"source": [ | |
"t = %timeit -on 2 -r 3 sum(has_point(*p, max_dist_km=3) for p in checked_points)\n", | |
"print(\"rows/sec: %.f\" % ((t.repeat * t.loops * rows) / sum(t.all_runs)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Cython 3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%%cython\n", | |
"from libc.math cimport cos, pi, sqrt\n", | |
"from cpython cimport bool\n", | |
"\n", | |
"\n", | |
"cdef double R = 6371.0 # radius of the earth in km\n", | |
"cdef double dlat = R / 180.0 * pi # ≈ 111.19 km\n", | |
"cdef double deg_rd = 2 * pi / 360.\n", | |
"\n", | |
"\n", | |
"cdef double cyx_euclid_dist(double lat_1, double lon_1, double lat_2, double lon_2):\n", | |
" cdef double lat_avg_rad = (lat_2 + lat_1) *.5 * deg_rd\n", | |
" cdef double coeff = cos(lat_avg_rad)\n", | |
" cdef double x = (lon_2 - lon_1) * coeff\n", | |
" cdef double y = lat_2 - lat_1\n", | |
"\n", | |
" return sqrt(x*x + y*y) * dlat\n", | |
"\n", | |
"\n", | |
"cdef bool cyx_is_near(double lat_1, double lon_1, double lat_2, double lon_2, float max_dist_km):\n", | |
" return cyx_euclid_dist(lat_1, lon_1, lat_2, lon_2) <= max_dist_km\n", | |
"\n", | |
" \n", | |
"cdef class cyx_HasGoodPoint:\n", | |
" cdef tuple lats_1, lons_1\n", | |
" cdef double lat_max, lon_max, lat_min, lon_min\n", | |
" cdef int idx_1\n", | |
" \n", | |
" def __init__(self, lats, lons):\n", | |
" \n", | |
" self.lats_1 = lats\n", | |
" self.lons_1 = lons\n", | |
" self.lat_max = max(self.lats_1) + .5\n", | |
" self.lon_max = max(self.lons_1) + .5\n", | |
" self.lat_min = min(self.lats_1) - .5\n", | |
" self.lon_min = min(self.lons_1) - .5\n", | |
" self.idx_1 = len(self.lons_1)\n", | |
"\n", | |
" \n", | |
" def __call__(self, tuple lats_2, tuple lons_2, double max_dist_km):\n", | |
" \n", | |
" cdef double lat_2, lon_2, lat_2_max, lon_2_max, lat_2_min, lon_2_min \n", | |
" \n", | |
" for i in xrange(len(lats_2)):\n", | |
" lat_2, lon_2 = lats_2[i], lons_2[i]\n", | |
" if not self.lat_min <= lat_2 <= self.lat_max:\n", | |
" continue\n", | |
" if not self.lon_min <= lon_2 <= self.lon_max:\n", | |
" continue\n", | |
" for x in xrange(self.idx_1):\n", | |
" if cyx_is_near(self.lats_1[x], self.lons_1[x], lat_2, lon_2, max_dist_km):\n", | |
" return True\n", | |
" return False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.62%\n" | |
] | |
} | |
], | |
"source": [ | |
"has_point = cyx_HasGoodPoint(lats, lons)\n", | |
"print(\"%.2f%%\" % (sum(has_point(*p, max_dist_km=3) for p in checked_points) / float(rows) * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 loops, best of 3: 115 ms per loop\n", | |
"rows/sec: 856432\n" | |
] | |
} | |
], | |
"source": [ | |
"t = %timeit -on 2 -r 3 sum(has_point(*p, max_dist_km=3) for p in checked_points)\n", | |
"print(\"rows/sec: %.f\" % ((t.repeat * t.loops * rows) / sum(t.all_runs)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment