Skip to content

Instantly share code, notes, and snippets.

@monocongo
Created June 7, 2016 15:25
Show Gist options
  • Save monocongo/233abf7bcf423d6d628f9379663d3ca2 to your computer and use it in GitHub Desktop.
Save monocongo/233abf7bcf423d6d628f9379663d3ca2 to your computer and use it in GitHub Desktop.
Example usage of xarray's split-apply-combine, using stack/groupby/apply/unstack
from __future__ import division
import logging
import numpy as np
import sys
import xarray
# set up a basic, global logger
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
#----------------------------------------------------------------------------------------------------------------------
def double_data(input_data):
# perform a nominal computation on the data
computed_data = input_data * 2
return computed_data
#----------------------------------------------------------------------------------------------------------------------
if __name__ == '__main__':
# get the command line arguments
precip_file = sys.argv[1]
output_file_base = sys.argv[2]
with xarray.open_dataset(precip_file) as dataset:
# use stack() to collapse lon and lat to a single dimension
dataset = dataset.stack(grid_cells=('lon', 'lat'))
# group by grid cell and apply the function
dataset = dataset.groupby('grid_cells').apply(double_data)
# use unstack() to reconstitute the lon & lat dimensions
dataset = dataset.unstack('grid_cells')
# rename the input dataset's "prcp" variable (which we've now overwritten with our computed values)
variable_name = 'dummy'
dataset = dataset.rename({'prcp': variable_name}, True)
# write the dataset to NetCDF
dataset.to_netcdf(output_file_base + '_' + variable_name + '.nc',
encoding = {variable_name: {'_FillValue': np.nan, 'dtype': 'float32'}})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment