Skip to content

Instantly share code, notes, and snippets.

@bennyistanto
Created January 26, 2024 06:10
Show Gist options
  • Save bennyistanto/a8aec5ce3130b13c48609a43daa8bc57 to your computer and use it in GitHub Desktop.
Save bennyistanto/a8aec5ce3130b13c48609a43daa8bc57 to your computer and use it in GitHub Desktop.
Convert raster GeoTIFF in a folder to single NetCDF file with time dimension enabled that is CF-Compliant
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "005c4a15-9e1e-4ee6-b717-ecd3b6ad5af2",
"metadata": {},
"source": [
"# GeoTIFFs to single NetCDF"
]
},
{
"cell_type": "markdown",
"id": "2af7b498-5754-4c4a-b3c7-791421457e4f",
"metadata": {},
"source": [
"Convert raster GeoTIFF in a folder to single NetCDF file with time dimension enabled that is CF-Compliant\n",
"http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html\n",
" \n",
"Based on Rich Signell's answer on StackExchange: https://gis.stackexchange.com/a/70487\n",
"\n",
"This script was tested using ERA5-Land daily data. Adjustment is needed if using other timesteps data. NCO (http://nco.sourceforge.net), GDAL, NetCDF4, numpy must be installed before using this script.\n",
" \n",
"Modified by\n",
"\n",
"Benny Istanto,</br>\n",
" UN World Food Programme, [email protected] [1]</br>\n",
" GOST/DECAT/DECDG, The World Bank, [email protected] [2]\n",
"\n",
"Log:</br>\n",
"1. 2020-07-05 First draft [1]</br>\n",
"2. 2023-07-15 Prevent the output shifted half-pixel [2]</br>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72964b0e-a85b-420d-b94c-9bde62bf6724",
"metadata": {},
"outputs": [],
"source": [
"#!/usr/bin/env python\n",
"import numpy as np\n",
"import datetime as dt\n",
"import os\n",
"from osgeo import gdal\n",
"gdal.UseExceptions()\n",
"import netCDF4\n",
"import re\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8beeaafb-98a6-4598-84f7-e05ef2dc3935",
"metadata": {},
"outputs": [],
"source": [
"ds = gdal.Open('X:\\\\Temp\\\\era5land\\\\tmean_daily\\\\geotiff\\\\2020\\\\wld_cli_era5land_tmean_20200101.tif')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "963fb826-7c0d-4150-9a47-dd45071222f5",
"metadata": {},
"outputs": [],
"source": [
"a = ds.ReadAsArray()\n",
"nlat,nlon = np.shape(a)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d37022e2-c4f9-4a2f-a1b1-6cb4b8315422",
"metadata": {},
"outputs": [],
"source": [
"b = ds.GetGeoTransform() # bbox, interval\n",
"lon = np.arange(nlon) * b[1] + b[0] + (b[1] / 2) # add half the x pixel size to the lon\n",
"lat = np.arange(nlat) * b[5] + b[3] + (b[5] / 2) # add half the y pixel size to the lat\n",
"lat = np.flipud(lat) # flip the latitudes\n",
"\n",
"basedate = dt.datetime(1981, 1, 1, 0, 0, 0)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbd90b7d-64ad-4cdd-a7da-89603c4986f5",
"metadata": {},
"outputs": [],
"source": [
"# create NetCDF file\n",
"nco = netCDF4.Dataset('X:\\\\Temp\\\\era5land\\\\tmean_daily\\\\wld_cli_era5land_daily_tmean_2020.nc','w',clobber=True)\n",
"\n",
"# chunking is optional, but can improve access a lot: \n",
"# (see: http://www.unidata.ucar.edu/blogs/developer/entry/chunking_data_choosing_shapes)\n",
"chunk_lon=10\n",
"chunk_lat=10\n",
"chunk_time=12\n",
"\n",
"# create dimensions, variables and attributes:\n",
"nco.createDimension('lon',nlon)\n",
"nco.createDimension('lat',nlat)\n",
"nco.createDimension('time',None)\n",
"\n",
"timeo = nco.createVariable('time','f4',('time'))\n",
"timeo.units = 'days since 1981-1-1 00:00:00'\n",
"timeo.standard_name = 'time'\n",
"timeo.calendar = 'gregorian'\n",
"timeo.axis = 'T'\n",
"\n",
"lono = nco.createVariable('lon','f4',('lon'))\n",
"lono.units = 'degrees_east'\n",
"lono.standard_name = 'longitude'\n",
"lono.long_name = 'longitude'\n",
"lono.axis = 'X'\n",
"\n",
"lato = nco.createVariable('lat','f4',('lat'))\n",
"lato.units = 'degrees_north'\n",
"lato.standard_name = 'latitude'\n",
"lato.long_name = 'latitude'\n",
"lato.axis = 'Y'\n",
"\n",
"# create container variable for CRS: lon/lat WGS84 datum\n",
"crso = nco.createVariable('crs','i4')\n",
"crso.long_name = 'Lon/Lat Coords in WGS84'\n",
"crso.grid_mapping_name='latitude_longitude'\n",
"crso.longitude_of_prime_meridian = 0.0\n",
"crso.semi_major_axis = 6378137.0\n",
"crso.inverse_flattening = 298.257223563\n",
"\n",
"# create short integer variable for SPEI data, with chunking\n",
"t2mo = nco.createVariable('tmean', 'f4', ('time', 'lat', 'lon'),\n",
" zlib=True,chunksizes=[chunk_time,chunk_lat,chunk_lon],fill_value=-9999)\n",
"t2mo.units = 'degree_Celsius'\n",
"t2mo.description = 'Temperature of air at 2m above the surface of land, sea or in-land waters.'\n",
"t2mo.standard_name = 'mean_air_temperature'\n",
"t2mo.long_name = 'mean air temperature'\n",
"t2mo.time_step = 'daily'\n",
"t2mo.missing_value = -9999\n",
"t2mo.geospatial_lat_min = -90.0\n",
"t2mo.geospatial_lat_max = 90.0\n",
"t2mo.geospatial_lon_min = -180.0\n",
"t2mo.geospatial_lon_max = 180.0\n",
"t2mo.grid_mapping = 'crs'\n",
"t2mo.set_auto_maskandscale(False)\n",
"\n",
"# Additional attributes\n",
"nco.Conventions='CF-1.6'\n",
"nco.cdm_data_type = \"GRID\"\n",
"nco.title = \"Global daily mean air temperature from ERA5-Land\"\n",
"nco.summary = \"This data originally derived from ERA5-Land Hourly data, extracted into daily aggregate using Google Earth Engine then exported as GeoTIFF file\"\n",
"nco.references = \"Muñoz Sabater, J. (2019): ERA5-Land hourly data from 1950 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: https://doi.org/10.24381/cds.e2161bac\"\n",
"nco.history = \"created by GOST - The World Bank\"\n",
"nco.version = \"Version 2024\"\n",
"nco.comments = \"time variable denotes the first day of the given day\"\n",
"nco.website = \"https://www.worldbank.org\"\n",
"nco.date_created = \"2024-01-17\"\n",
"nco.creator_name = \"Benny Istanto\"\n",
"nco.creator_role = \"Climate Geographer\"\n",
"nco.creator_email = \"[email protected]\"\n",
"nco.institution = \"The World Bank\"\n",
"nco.note = \"The data is developed to support The World Bank, to assess extreme temperature, hot days and heat as part of Global Monitoring\"\n",
"\n",
"#write lon,lat\n",
"lono[:]=lon\n",
"lato[:]=lat\n",
"\n",
"pat = re.compile(r'wld_cli_era5land_tmean_([0-9]{8})\\.tif$') # raw string for regular expression\n",
"\n",
"itime = 0\n",
"\n",
"# Step through data, writing time and data to NetCDF\n",
"for root, dirs, files in os.walk('X:\\\\Temp\\\\era5land\\\\tmean_daily\\\\geotiff\\\\2020'):\n",
" dirs.sort()\n",
" files.sort()\n",
" for f in files:\n",
" match = re.match(pat, f)\n",
" if match:\n",
" date_str = match.group(1) # This will give you '19810101'\n",
" date = dt.datetime.strptime(date_str, '%Y%m%d') # Convert to datetime\n",
" print(date)\n",
" dtime = (date - basedate).total_seconds()/86400.\n",
" timeo[itime] = dtime\n",
" # Temperature\n",
" t2m_path = os.path.join(root,f)\n",
" print(t2m_path)\n",
" t2m = gdal.Open(t2m_path)\n",
" a = t2m.ReadAsArray() # data\n",
" t2mo[itime,:,:] = np.flipud(a) # flip the data in y-direction\n",
" itime = itime + 1\n",
"\n",
"nco.close()\n",
"\n",
"print('Completed!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e72ba42-fc68-4c3b-9f73-4c6941304e5e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment