-
-
Save bzgeo/950f3db986b3513311ed42efe2395171 to your computer and use it in GitHub Desktop.
Python script to convert from ICESat-2 ATL08 HDF data to shapefile. Usage: 'python icesat2_shp.py <path> --variables [<var1>,<var2>,...,<varN>] --outFormat <extension> --filterBounds [<W>,<S>,<E>,<N>] --verbose'
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# based on Kel Markert's GEDI HDF to shapefile conversion script | |
# https://gist.github.com/KMarkert/c68ccf53260d7b775b836bf2e11e2ec3 | |
import os | |
import fire | |
import h5py | |
import glob | |
import tqdm | |
import numpy as np | |
import pandas as pd | |
import geopandas as gpd | |
# requires fire, h5py, tqdm, numpy, pandas, and geopandas | |
def gedi_to_vector(file,variables=None,outFormat='CSV',filterBounds=None): | |
# open hdf5 file | |
data = h5py.File(file,'r') | |
# get full file name and extension | |
name,_ = os.path.splitext(file) | |
# create empty dataframe to append data to | |
df = pd.DataFrame() | |
# loop over all of the hdf5 groups | |
for k in list(data.keys()): | |
# if gt in the group name | |
if 'gt' in k: | |
# get the geolocation subgroup | |
geo = data[k]['land_segments'] | |
d = {} | |
# loop through all of the variables defined earlier | |
for var in variables: | |
# assign variable array to dict key | |
if var in list(data[k]["land_segments"].keys()): | |
d[var] = np.array(geo[var]) | |
elif var in list(data[k]["land_segments"]['canopy'].keys()): | |
d[var] = np.array(data[k]["land_segments"]['canopy'][var]) | |
else: | |
raise ValueError(f"The variable {var} is not in the geolocation or gt group of the file {file}.") | |
d["gt"] = [k] * len(d[var]) | |
# convert dict of varaibles to dataframe | |
tdf = pd.DataFrame(d) | |
# concat to larger dataframe | |
df = pd.concat([df,tdf],axis=0,sort=False) | |
# check if the the filterBounds is provided | |
if filterBounds is not None: | |
w,s,e,n = filterBounds # expand list to individual variables | |
# select features on X axis | |
horizontalMask = (df.longitude >= w) & (df.longitude <= e) | |
# select features on Y axis | |
verticalMask = (df.latitude >= s) & (df.latitude <= n) | |
# combines masks to select features that intersect | |
spatialMask = verticalMask & horizontalMask | |
# grab only masked values within the bounds provided | |
df = df.loc[spatialMask] | |
# check to make sure that the dataframe has values. | |
# if not, then return from function without saving df | |
if df.size == 0: | |
return | |
if outFormat in ['CSV','csv']: | |
# save dataframe of parsed variables to CSV file | |
df.to_csv('{}.{}'.format(name,outFormat.lower()),index=False) | |
else: | |
# check if df has the geoinformation | |
if ('latitude' not in df.columns) or ('longitude' not in df.columns): | |
raise KeyError("Geospatial variables 'latitude' and/or 'longitude' were not found, " | |
"please specify these variables to be extracted when writing to geospatial format") | |
# convert to geodataframe | |
gdf = gpd.GeoDataFrame( | |
df, geometry=gpd.points_from_xy(df.longitude, df.latitude)) | |
# save the geodataframe of variables to file | |
gdf.to_file('{}.{}'.format(name,outFormat.lower())) | |
return | |
def main(path,variables=None,verbose=False,outFormat='CSV',filterBounds=None): | |
# check if the variables to extract have been defined | |
if variables is None: | |
raise ValueError("Please provide variables from the GEDI file to convert") | |
# if variables have been defined, check if provided in correct datetype | |
if type(variables) is not list: | |
raise TypeError("Provided variables is not list, please provide argument as '[<var1>,<var2>,<var3>]'") | |
# check if filterBounds have been provided and in correct datatype | |
if (filterBounds is not None) and (type(filterBounds) is not list): | |
raise TypeError("Provided filterBounds is not list, please provide argument as '[W,S,E,N]'") | |
# check if the output format provided is supported by script | |
availableFormats = ['CSV','SHP','GeoJSON','GPKG','csv','shp','geojson','gpkg'] | |
if outFormat not in availableFormats: | |
raise NotImplementedError('Selected output format is not support please select one of the following: "CSV","SHP","GeoJSON","GPKG"') | |
# check if path provided is a file or folder | |
if os.path.isfile(path): | |
flist = [path] | |
else: | |
# only search for h5 files in the path provided | |
flist = glob.glob(os.path.join(path,'*.h5')) | |
if verbose: | |
print('\n') | |
t = tqdm.tqdm(total=len(flist)) | |
# loop through the files and do the conversion | |
for i,f in enumerate(flist): | |
if verbose: | |
_, desc = os.path.split(f) | |
t.set_description(desc="Processing {}".format(desc)) | |
gedi_to_vector(f,variables,outFormat,filterBounds) | |
if verbose: | |
t.update(i+1) | |
return | |
if __name__ == "__main__": | |
fire.Fire(main) |
Author
bzgeo
commented
Nov 17, 2021
via email
Hi,
Sorry for the delay in responding. From what you posted, it looks like you
did not specify all of the script's conditions, and that may have been why
it throws an error. The following is an example I use to extract data for
one of my AOIs:
python gedi_shp_v2.py F:\gis\workspace\temp_2020\gedi\bz_gedi02_b\
--variables [latitude_bin0,longitude_bin0,height_bin0] --outFormat shp
--filterBounds [-89.3,15.7,-87.7,18.6] --verbose
Also note that I am pointing to the directory where all of the data are
located, and I had to specify the filterbounds. Can you retry it?
Otherwise, if you can share your file, and your bounds, I can test it for
you.
Best,
Emil
*Emil A. Cherrington, Ph.D.*
*Research Scientist III*
Earth System Science Center - University of Alabama in Huntsville
National Space Science & Technology Center, Room 1016
320 Sparkman Drive
Huntsville, AL 35805
TEL: (256) 961-7040
***@***.*** / ***@***.***
…On Mon, Nov 15, 2021 at 9:03 AM Jesse Smith ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
Trying to run this function in the console as so:
gedi_to_vector("ATL03_20181015152804_02600102_004_01.h5",variables=["latitude","longitude","h_canopy"],outFormat='CSV',filterBounds=None)
and I return this error:
`---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
in
----> 1
gedi_to_vector("ATL03_20181015152804_02600102_004_01.h5",variables=["latitude","longitude","h_canopy"],outFormat='CSV',filterBounds=None)
in gedi_to_vector(file, variables, outFormat, filterBounds)
27 if 'gt1l' in k:
28 # get the geolocation subgroup
---> 29 geo = data[k]['land_segments']
30 d = {}
31 # loop through all of the variables defined earlier
h5py/_objects.pyx in h5py._objects.with_phil.wrapper()
h5py/_objects.pyx in h5py._objects.with_phil.wrapper()
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/h5py/_hl/group.py
in *getitem*(self, name)
303 raise ValueError("Invalid HDF5 object reference")
304 elif isinstance(name, (bytes, str)):
--> 305 oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
306 else:
307 raise TypeError("Accessing a group is done with bytes or str, "
h5py/_objects.pyx in h5py._objects.with_phil.wrapper()
h5py/_objects.pyx in h5py._objects.with_phil.wrapper()
h5py/h5o.pyx in h5py.h5o.open()
KeyError: "Unable to open object (object 'land_segments' doesn't exist)"`
—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
<https://gist.github.com/950f3db986b3513311ed42efe2395171#gistcomment-3963033>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AOT54QT3HHIAKAZMDCZMC5LUMEOLFANCNFSM4NNK3RXA>
.
Triage notifications on the go with GitHub Mobile for iOS
<https://apps.apple.com/app/apple-store/id1477376905?ct=notification-email&mt=8&pt=524675>
or Android
<https://play.google.com/store/apps/details?id=com.github.android&referrer=utm_campaign%3Dnotification-email%26utm_medium%3Demail%26utm_source%3Dgithub>.
Hi bzgeo,
I actually managed to get it working, which is why I deleted the comment. Thanks!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment