Created
October 28, 2019 09:18
-
-
Save jorisvandenbossche/de8bfd1c1a7f62e25fd6f29bd9c3c0e3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Getting a flat array of coordinates from array of Geometry objects" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import geopandas\n", | |
| "import pygeos" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Quick copy of the relevant part of https://github.com/pyviz/datashader/pull/819\n", | |
| "\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "def from_geopandas(ga):\n", | |
| " line_parts = [\n", | |
| " _shapely_to_array_parts(shape) for shape in ga\n", | |
| " ]\n", | |
| " line_lengths = [\n", | |
| " sum([len(part) for part in parts])\n", | |
| " for parts in line_parts\n", | |
| " ]\n", | |
| " flat_array = np.concatenate(\n", | |
| " [part for parts in line_parts for part in parts]\n", | |
| " )\n", | |
| " start_indices = np.concatenate(\n", | |
| " [[0], line_lengths[:-1]]\n", | |
| " ).astype('uint').cumsum()\n", | |
| " return start_indices, flat_array\n", | |
| " \n", | |
| " \n", | |
| "def _polygon_to_array_parts(polygon):\n", | |
| " import shapely.geometry as sg\n", | |
| " shape = sg.polygon.orient(polygon)\n", | |
| " exterior = np.asarray(shape.exterior.ctypes)\n", | |
| " polygon_parts = [exterior]\n", | |
| " hole_separator = np.array([-np.inf, -np.inf])\n", | |
| " for ring in shape.interiors:\n", | |
| " interior = np.asarray(ring.ctypes)\n", | |
| " polygon_parts.append(hole_separator)\n", | |
| " polygon_parts.append(interior)\n", | |
| " return polygon_parts\n", | |
| "\n", | |
| "def _shapely_to_array_parts(shape):\n", | |
| " import shapely.geometry as sg\n", | |
| " if isinstance(shape, sg.Polygon):\n", | |
| " # Single polygon\n", | |
| " return _polygon_to_array_parts(shape)\n", | |
| " elif isinstance(shape, sg.MultiPolygon):\n", | |
| " shape = list(shape)\n", | |
| " polygon_parts = _polygon_to_array_parts(shape[0])\n", | |
| " polygon_separator = np.array([np.inf, np.inf])\n", | |
| " for polygon in shape[1:]:\n", | |
| " polygon_parts.append(polygon_separator)\n", | |
| " polygon_parts.extend(_polygon_to_array_parts(polygon))\n", | |
| " return polygon_parts\n", | |
| " else:\n", | |
| " raise ValueError(\"\"\"\n", | |
| "Received invalid value of type {typ}. Must be an instance of\n", | |
| "shapely.geometry.Polygon or shapely.geometry.MultiPolygon\"\"\"\n", | |
| " .format(typ=type(shape).__name__))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Testing on the \"countries\" data:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Datashader's method:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(array([ 0, 48, 152, 208, 1854, 2766, 2990, 3098, 3266,\n", | |
| " 3790, 4034, 4264, 4512, 4586, 4660, 4822, 4938, 4980,\n", | |
| " 5032, 6308, 6354, 6374, 6556, 6820, 6838, 6860, 7050,\n", | |
| " 7074, 7414, 7456, 7862, 7982, 8134, 8334, 8438, 8510,\n", | |
| " 8614, 8728, 8768, 8838, 8878, 9062, 9142, 9194, 9346,\n", | |
| " 9412, 9430, 9452, 9536, 9610, 9690, 9778, 9866, 10018,\n", | |
| " 10096, 10146, 10262, 10378, 10500, 10538, 10588, 10680, 10820,\n", | |
| " 10858, 10912, 10956, 11034, 11158, 11256, 11318, 11332, 11454,\n", | |
| " 11510, 11668, 11690, 11842, 11868, 11920, 11942, 12040, 12058,\n", | |
| " 12090, 12152, 12276, 12314, 12358, 12376, 12394, 12454, 12550,\n", | |
| " 12576, 12610, 12738, 12812, 12952, 13040, 13138, 13176, 13326,\n", | |
| " 13598, 13670, 13696, 13742, 13874, 14012, 14094, 14164, 14272,\n", | |
| " 14424, 14478, 14518, 14598, 14688, 14874, 14964, 15038, 15102,\n", | |
| " 15156, 15244, 15282, 15326, 15364, 15480, 15536, 15646, 15784,\n", | |
| " 15832, 15918, 15966, 15980, 16014, 16044, 16110, 16212, 16238,\n", | |
| " 16264, 16346, 16480, 16964, 16984, 17466, 17484, 17662, 17712,\n", | |
| " 17826, 17866, 17956, 18004, 18236, 18362, 18378, 18414, 18494,\n", | |
| " 18560, 18630, 18686, 18820, 18886, 18974, 19126, 20462, 20494,\n", | |
| " 20524, 20650, 20738, 20850, 20968, 20998, 21050, 21106, 21134,\n", | |
| " 21180, 21216, 21312, 21348, 21390, 21406], dtype=uint64),\n", | |
| " array([180. , -16.06713266, 179.41350936, ..., 4.17369904,\n", | |
| " 30.83385242, 3.5091716 ]))" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from_geopandas(df.geometry.array)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "46.2 ms ± 4.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit from_geopandas(df.geometry.array)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Using pygeos:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "arr = pygeos.from_wkb(geopandas.array.to_wkb(df.geometry.array))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[180. , -16.06713266],\n", | |
| " [180. , -16.55521657],\n", | |
| " [179.36414266, -16.80135408],\n", | |
| " ...,\n", | |
| " [ 31.88145 , 3.55827 ],\n", | |
| " [ 31.24556 , 3.7819 ],\n", | |
| " [ 30.83385242, 3.5091716 ]])" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pygeos.get_coordinates(arr)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "154 µs ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit pygeos.get_coordinates(arr)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Note: this currently gives a 2D array (not x and y intertwined) and it does not give the start indices, but I think having a version that does those things should not be a order of magnitude slower." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python (dev)", | |
| "language": "python", | |
| "name": "dev" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment