Last active
September 23, 2021 16:25
-
-
Save tastatham/152b30fe87e8f78a9fc1889189e52aab to your computer and use it in GitHub Desktop.
spatial_shuffle.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def spatial_shuffle(ddf, by="hilbert", column=None, npartitions=20, p=10, **kwargs): | |
""" | |
A function that spatially shuffles a Dask-GeoSeries object by a method | |
or a user-defined column | |
Parameters | |
---------- | |
by : str | |
partitioning method or column | |
drop : bool | |
to drop the partitioning information held in index | |
npartitions : int | |
number to partition DataFrame | |
**kwargs: | |
abc | |
Returns | |
---------- | |
dask Series : | |
Spatially sorted Dask-GeoPandas Series | |
""" | |
if column is None: | |
# Calculate partition methods | |
if by == "hilbert": | |
ddf[by] = ddf.hilbert_distance(p) | |
elif by == "morton": | |
ddf[by] = ddf.morton_distance(p) | |
elif by == "geohash": | |
ddf[by] = ddf.geohash(p) | |
else: | |
raise ValueError( | |
"Spatial partitioning only supports 'hilbert', 'morton' and 'geohash' methods" | |
) | |
ddf = ddf.shuffle( | |
on=by, | |
npartitions=npartitions, | |
**kwargs | |
) | |
if drop is True: | |
ddf = ddf.drop(by, axis=1) | |
ddf = ddf.set_geometry(col="geometry") | |
# Calculate convex hull of each partition | |
ddf.calculate_spatial_partitions() | |
return ddf |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment