Created
March 13, 2017 21:06
-
-
Save rmania/8c88377a5c902dfbc134795a7af538d8 to your computer and use it in GitHub Desktop.
flatten geometry series (3D to 2D) in geopandas dataframe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Often when reading in a ShapeFile from Basemap, you'll get: "ValueError: readshapefile can only handle 2D shape types" | |
# A trick can be to convert your geometry in your GeoPandas Dataframe and restoring the new flattened 2D geometry | |
# series back into a shapefile and try again. | |
# edit from http://stackoverflow.com/questions/33417764/basemap-readshapefile-valueerror | |
from shapely.geometry import Polygon, MultiPolygon, shape, Point | |
import geopandas as gp | |
def convert_3D_2D(geometry): | |
''' | |
Takes a GeoSeries of 3D Multi/Polygons (has_z) and returns a list of 2D Multi/Polygons | |
''' | |
new_geo = [] | |
for p in geometry: | |
if p.has_z: | |
if p.geom_type == 'Polygon': | |
lines = [xy[:2] for xy in list(p.exterior.coords)] | |
new_p = Polygon(lines) | |
new_geo.append(new_p) | |
elif p.geom_type == 'MultiPolygon': | |
new_multi_p = [] | |
for ap in p: | |
lines = [xy[:2] for xy in list(ap.exterior.coords)] | |
new_p = Polygon(lines) | |
new_multi_p.append(new_p) | |
new_geo.append(MultiPolygon(new_multi_p)) | |
return new_geo | |
geodf_2d = gp.GeoDataFrame.from_file(shp_file) # plug_in your shapefile | |
geodf_2d.geometry = convert_3D_2D(geodf_2d.geometry) # new geodf with 2D geometry series | |
# geodf_2d.to_file(path + shapefile.shp, driver = 'ESRI Shapefile') will sore a shapefile with 2D shape types |
Thanks above, output_dimensions=2
ftw!
import geopandas as gpd
from shapely import wkb
_drop_z = lambda geom: wkb.loads(wkb.dumps(geom, output_dimension=2))
df.geometry = df.geometry.transform(_drop_z)
%timeit drop_z(df.geometry) # 42.9 s ± 270 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_pygeos(df.geometry) # 568 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_convert_3D_2D(df.geometry) # 43.6 s ± 541 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
detail
- The
convert_3D_2D
above only works for (Multi)Polygons. drop_z_convert_3D_2D
is a rewrite ofconvert_3D_2D
for all shapely geometry objects.drop_z_pygeos
requires pygeos to be installed. at least until shapely 2 is released.drop_z_shapely_for
uses a method suggested: shapely/shapely#709drop_z_shapely_transform
is to compare between the transform and for method for looping between rows/geoms. I did these to explain why I changed the suggested method. >10% improvement.- I ran this on LineStrings of UK rivers with 186,255 rows.
import shapely
import geopandas as gpd
df = gpd.read_file(<Your Data Here>)
def convert_3D_2D(geometry):
''' Drop Z coordiates from GeoSeries, returning list if (Multi)Polygons
Taken from: https://gist.github.com/rmania/8c88377a5c902dfbc134795a7af538d8
Takes a GeoSeries of 3D Multi/Polygons (has_z) and returns a list of 2D Multi/Polygons
'''
new_geo = []
for p in geometry:
if p.has_z:
if p.geom_type == 'Polygon':
lines = [xy[:2] for xy in list(p.exterior.coords)]
new_p = Polygon(lines)
new_geo.append(new_p)
elif p.geom_type == 'MultiPolygon':
new_multi_p = []
for ap in p:
lines = [xy[:2] for xy in list(ap.exterior.coords)]
new_p = Polygon(lines)
new_multi_p.append(new_p)
new_geo.append(MultiPolygon(new_multi_p))
return new_geo
def drop_z_shapelywkb(ds):
''' Drop Z coordinates from GeoSeries, returns GeoSeries
Using this method: https://gist.github.com/rmania/8c88377a5c902dfbc134795a7af538d8?permalink_comment_id=2893099#gistcomment-2893099
'''
_drop_z_wkb = lambda geom: shapely.wkb.loads(shapely.wkb.dumps(geom, output_dimension=2))
return ds.transform(_drop_z_wkb)
def drop_z_pygeos(ds):
''' Drop Z coordinates from GeoSeries, returns GeoSeries
Requires pygeos to be installed, and such I've added `import pygeos` to check.
'''
import pygeos
return gpd.GeoSeries.from_wkb(ds.to_wkb(output_dimension=2))
def drop_z_convert_3D_2D(ds):
''' Drop Z coordinates from GeoSeries, returns GeoSeries
A rewrite of convert_3D_2D but to fit the form of these other drop_z... functions.
'''
_get_coords = lambda g: (
g.coords
if not g.geom_type == 'Polygon' else # isinstance(g, Polygon)?
g.exterior.coords
)
_drop_z = lambda geom: type(geom)(
(_drop_z(g) for g in geom.geoms) # nest for iterables
if hasattr(geom, '__iter__') else
(coord[:2] for coord in _get_coords(geom)) # keep x,y
)
return gpd.GeoSeries(_drop_z(geom) for geom in ds)
_drop_z_shapely = lambda geom: shapely.ops.transform(lambda *args: args[:2], geom)
def drop_z_shapely_for(ds):
''' Drop Z coordinates from GeoSeries, returns GeoSeries
Using this method: https://github.com/shapely/shapely/issues/709#issuecomment-799977173
'''
return gpd.GeoSeries(_drop_z_shapely(geom) for geom in ds)
def drop_z_shapely_transform(ds):
''' Drop Z coordinates from GeoSeries, returns GeoSeries
Same as drop_z_shapely_for but using the pandas transform operation instead.
'''
return ds.transform(lambda geom: _drop_z_shapely(geom))
a = convert_3D_2D(df.geometry) # [] (empty list because I have only LineStrings)
b = drop_z_shapelywkb(df.geometry) # GeoSeries, 2D geoms
c = drop_z_pygeos(df.geometry) # GeoSeries, 2D geoms
d = drop_z_convert_3D_2D(df.geometry) # GeoSeries, 2D geoms
e = drop_z_shapely_for(df.geometry) # GeoSeries, 2D geoms
f = drop_z_shapely_transform(df.geometry) # GeoSeries, 2D geoms
print( all([
gpd.GeoSeries.all(b==c),
gpd.GeoSeries.all(c==d),
gpd.GeoSeries.all(d==e),
gpd.GeoSeries.all(e==f),
gpd.GeoSeries.all(f==b),
]) ) # True
%timeit convert_3D_2D(df.geometry) # 19.7 s ± 430 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_shapelywkb(df.geometry) # 42.9 s ± 270 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_pygeos(df.geometry) # 568 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_convert_3D_2D(df.geometry) # 43.6 s ± 541 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_shapely_for(df.geometry) # 45.6 s ± 310 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit drop_z_shapely_transform(df.geometry) # 37.7 s ± 722 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Thanks! it helped me get 2D Polygons from the KML file from Google Earth.
Thanks so much! This really helped me getting 2D polygons from Google Earth pro!
Is force_2d now the best approach for this?
Thanks! works great.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Really helped me out - thank you :)