Skip to content

Instantly share code, notes, and snippets.

@arbennett
Created June 27, 2017 18:11
Show Gist options
  • Save arbennett/6185add8773d312a2802614fca5e9527 to your computer and use it in GitHub Desktop.
Save arbennett/6185add8773d312a2802614fca5e9527 to your computer and use it in GitHub Desktop.
Workflow for uploading data from netcdf to google earth engine
#!/usr/bin/env python2
"""
Script that allows for conversion of a netcdf file to
a stack of geotiff files with each raster being a time
slice from the input file.
Base implementation taken from:
https://www.linkedin.com/pulse/convert-netcdf4-file-geotiff-using-python-chonghua-yin
Usage:
./nc_to_gtiff.py varname infile.nc outdir
"""
import os
import sys
from osgeo import gdal, osr, gdal_array
import xarray as xr
import numpy as np
import pandas as pd
def get_netcdf_info(fname, var):
subset = 'NETCDF:"' + fname + '":' + var
sub_ds = gdal.Open(subset)
nodata = sub_ds.GetRasterBand(1).GetNoDataValue()
xsize = sub_ds.RasterXSize
ysize = sub_ds.RasterYSize
geot = sub_ds.GetGeoTransform()
proj = osr.SpatialReference()
proj.SetWellKnownGeogCS('NAD27')
data = xr.open_dataset(fname)[var]
data = np.ma.masked_array(data, mask=(data == nodata), fill_value=nodata)
return nodata, xsize, ysize, geot, proj, data
def create_geotiff(suffix, arr, ndv, xsize, ysize, geot, proj):
dt = gdal_array.NumericTypeCodeToGDALTypeCode(arr.dtype)
if type(dt) != np.int:
if dt.startswith('gdal.GDT_') is False:
dt = eval('gdal.GDT_'+dt)
new_fname = suffix + '.tif'
zsize = 1
driver = gdal.GetDriverByName('GTiff')
arr[np.isnan(arr)] = ndv
ds = driver.Create(new_fname, xsize, ysize, zsize, dt)
ds.SetProjection(proj.ExportToWkt())
ds.SetGeoTransform(geot)
ds.GetRasterBand(1).WriteArray(arr)
ds.GetRasterBand(1).SetNoDataValue(ndv)
ds.FlushCache()
return new_fname
if __name__ == '__main__':
var, infile, outdir = sys.argv[1:]
if os.path.exists(outdir) and os.path.isfile(outdir):
exit("Output path exists and is a regular file! \n"
"Please provide a new output directory and try again")
if not os.path.exists(outdir):
os.mkdir(outdir)
ds = xr.open_dataset(infile)
dates = ds.time.values
ndv, xs, ys, geot, proj, data = get_netcdf_info(infile, var)
for i, d in enumerate(data):
date = pd.to_datetime(str(dates[i])).strftime('%Y_%m_%d')
create_geotiff('{}{}out_{}_{}'.format(outdir, os.path.sep, var, date),
d, ndv, xs, ys, geot, proj)
if i == 10:
break
#!/usr/bin/env bash
#
# Uploads some files to google cloud storage
#
# -----------------------------------------
# WARNING: This will make everything in the
# output bucket public!!!
# -----------------------------------------
#
# Usage:
# ./transfer_to_gcs.sh src_dir dest_bucket
gsutil cp $1/* gs://$2
gsutil -m acl set -R -a public-read gs://$2
#!/usr/bin/env bash
# Uploads some files from google cloud storage to gee
# Usage:
# ./transfer_to_gee.sh src_bucket dest_asset
result=`earthengine create collection users/$2`
if `test -z "$result"`; then
echo $result
exit 1
fi
for geotiff in `gsutil ls gs://$1/*.tif`; do
filename=`basename $geotiff`
asset_id="${filename%.*}"
earthengine upload image --asset_id=users/$2/$asset_id $geotiff
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment