Emile Tenezakis emileten

Data Engineer/Developer

emileten / xarray-openmfdataset-expand-assign-coords.py

Created April 21, 2022 23:11

Open multiple netcdf datasets with xarray, and automatically modify (add a dimension with coordinates based on file path) and then combine them.

	### This works #####

	FILEPATTERN = '...'
	def func(ds):
	var = next(var for var in ds)
	fp = ds[var].encoding['source']
	coordds = ds.assign_coords(path=fp)
	dimds = coordds.expand_dims('path')
	return dimds
	fs = fsspec.filesystem('gs')

emileten / dict-to-and-from-yaml.py

Created February 25, 2022 01:24

dict-to-and-from-yaml

	from yaml import load, dump, Loader

	fp = '/Users/emile/Desktop/testyaml.yaml'
	out = {
	'a': 1
	}
	with open(fp, 'w') as f:
	dump(out, f)

	with open(fp, 'r') as f:

emileten / impose-cell-specific-max.py

Created February 8, 2022 04:32

impose a cell specific temporal cap on an [time, lon, lat] xarray dataset.

	import xarray as xr
	import numpy as np

	### Fake data ###

	def spatio_temporal_gcm_factory(
	x=np.random.rand(1, 361, 721),
	start_date="1995-01-01",
	lat=np.arange(-90, 90.5, 0.5),
	lon=np.arange(-180, 180.5, 0.5),

emileten / replace-low-values-by-sample-from-uniform.py

Created January 24, 2022 22:14

Replace xarray values conditionally by a set of values samples from the uniform distribution

 def censor(x):
   if x < 1:
     return np.random.uniform(low=0.5, high=1)
   else:
     return x
 vcensor = np.vectorize(censor)
 ds_corrected = xr.apply_ufunc(vcensor, ds, dask='parallelized') # rather, should we load the data before hand ?

emileten / quantiles_extrapolation_QDM_xclim

Created January 12, 2022 01:50

	import numpy as np
	import xarray as xr
	from xclim import sdba

	def _datafactory(
	x, start_time="1950-01-01", variable_name="fakevariable", lon=1.0, lat=1.0
	):
	"""Populate xr.Dataset with synthetic data for testing"""
	start_time = str(start_time)
	if x.ndim != 1: