-
-
Save KMarkert/c68ccf53260d7b775b836bf2e11e2ec3 to your computer and use it in GitHub Desktop.
import os | |
import fire | |
import h5py | |
import glob | |
import tqdm | |
import numpy as np | |
import pandas as pd | |
import geopandas as gpd | |
# requires fire, h5py, tqdm, numpy, pandas, and geopandas | |
def gedi_to_vector(file,variables=None,outFormat='CSV',filterBounds=None): | |
# open hdf5 file | |
data = h5py.File(file,'r') | |
# get full file name and extension | |
name,_ = os.path.splitext(file) | |
# create empty dataframe to append data to | |
df = pd.DataFrame() | |
# loop over all of the hdf5 groups | |
for k in list(data.keys()): | |
# if BEAM in the group name | |
if 'BEAM' in k: | |
# get the geolocation subgroup | |
geo = data[k]['geolocation'] | |
d = {} | |
# loop through all of the variables defined earlier | |
for var in variables: | |
# assign variable array to dict key | |
d[var] = np.array(geo[var]) | |
# convert dict of varaibles to dataframe | |
tdf = pd.DataFrame(d) | |
# concat to larger dataframe | |
df = pd.concat([df,tdf],axis=0,sort=False) | |
# check if the the filterBounds is provided | |
if filterBounds is not None: | |
w,s,e,n = filterBounds # expand list to individual variables | |
# select features on X axis | |
horizontalMask = (df.longitude_bin0 >= w) & (df.longitude_bin0 <= e) | |
# select features on Y axis | |
verticalMask = (df.latitude_bin0 >= s) & (df.latitude_bin0 <= n) | |
# combines masks to select features that intersect | |
spatialMask = verticalMask & horizontalMask | |
# grab only masked values within the bounds provided | |
df = df.loc[spatialMask] | |
# check to make sure that the dataframe has values. | |
# if not, then return from function without saving df | |
if df.size == 0: | |
return | |
if outFormat in ['CSV','csv']: | |
# save dataframe of parsed variables to CSV file | |
df.to_csv('{}.{}'.format(name,outFormat.lower()),index=False) | |
else: | |
# check if df has the geoinformation | |
if ('latitude_bin0' not in df.columns) or ('longitude_bin0' not in df.columns): | |
raise KeyError("Geospatial variables 'latitude_bin0' and/or 'longitude_bin0' were not found, " | |
"please specify these variables to be extracted when writing to geospatial format") | |
# convert to geodataframe | |
gdf = gpd.GeoDataFrame( | |
df, geometry=gpd.points_from_xy(df.longitude_bin0, df.latitude_bin0)) | |
# save the geodataframe of variables to file | |
gdf.to_file('{}.{}'.format(name,outFormat.lower())) | |
return | |
def main(path,variables=None,verbose=False,outFormat='CSV',filterBounds=None): | |
# check if the variables to extract have been defined | |
if variables is None: | |
raise ValueError("Please provide variables from the GEDI file to convert") | |
# if variables have been defined, check if provided in correct datetype | |
if type(variables) is not list: | |
raise TypeError("Provided variables is not list, please provide argument as '[<var1>,<var2>,<var3>]'") | |
# check if filterBounds have been provided and in correct datatype | |
if (filterBounds is not None) and (type(filterBounds) is not list): | |
raise TypeError("Provided filterBounds is not list, please provide argument as '[W,S,E,N]'") | |
# check if the output format provided is supported by script | |
availableFormats = ['CSV','SHP','GeoJSON','GPKG','csv','shp','geojson','gpkg'] | |
if outFormat not in availableFormats: | |
raise NotImplementedError('Selected output format is not support please select one of the following: "CSV","SHP","GeoJSON","GPKG"') | |
# check if path provided is a file or folder | |
if os.path.isfile(path): | |
flist = [path] | |
else: | |
# only search for h5 files in the path provided | |
flist = glob.glob(os.path.join(path,'*.h5')) | |
if verbose: | |
print('\n') | |
t = tqdm.tqdm(total=len(flist)) | |
# loop through the files and do the conversion | |
for i,f in enumerate(flist): | |
if verbose: | |
_, desc = os.path.split(f) | |
t.set_description(desc="Processing {}".format(desc)) | |
gedi_to_vector(f,variables,outFormat,filterBounds) | |
if verbose: | |
t.update(i+1) | |
return | |
if __name__ == "__main__": | |
fire.Fire(main) |
Wonderful! Thanks for your quick response. It works.
Any thoughts on how to extract the 2-dimensional variables cover_z, pai_z and pavd_z for z = 0 - 29?
@ovanlier that would require a change to the code where at lines 26-31 it is only searching through the geolocation
subgroup but would need to expand the search (or allow a search) to all subgroups within the file.
I will try to update the script soon (hopefully today) where you would not have to explicitly call for the latitude_bin0
and longitude_bin0
variables and extract any variable.
@KMarkert thank you for your reply.
The fork submitted by rbavery (found here) allows for extracting variables outside the geolocation subgroup.
Most variables are 1-dimensional, however, variables like cover_z, pai_z and pavd_z are 2-dimensional. The issue lies in attempting to extract the 2D array variables. For example, the 2D variable cover_z has a 1D array for every z for z = 0, 1, 2, ..., 29.
It would be nice to extract cover_z for all z (i.e., cover_0, cover_1, cover_2, ..., cover_29).
Oh, thanks for letting me know! Probably saved me a bunch of time figuring that the data is 2D...the 2D format should be fun to extract but doable. I will have a look and update when I have something.
Great job K.! keep up the good work.
Impressive R package for all your GEDI processing: https://github.com/carlos-alberto-silva/rGEDI
@ovanlier, agreed. There are some nice packages coming out to interface with the GEDI data. Here is a Python package too: https://github.com/EduinHSERNA/pyGEDI
I looked into the Python package and it seems that there is the ability to extract out data, plot, and do whatever else is needed. So, it looks like this script may be obsolete....I would suggest using the packages for analysis since they are maintained and have more features.
I also had the same problem the first time I used this code. The longtitude_bin0 and latitude_bin should be included intovariable.
variables=['height_bin0', 'longtitude_bin0', 'latitude_bin'].