Skip to content

Instantly share code, notes, and snippets.

@rabernat
Last active March 9, 2018 16:25
Show Gist options
  • Save rabernat/e54755e7de4eb5a93cc4e7f9f903e3cc to your computer and use it in GitHub Desktop.
Save rabernat/e54755e7de4eb5a93cc4e7f9f903e3cc to your computer and use it in GitHub Desktop.
writable MDS store for dask
class writable_mds_store:
def __init__(self, prefix, iters, suffix='data', dtype='>f4'):
self.prefix = prefix
self.iters = iters
self.suffix = suffix
self.dtype = dtype
def __setitem__(self, idx, data):
# first slice should be the time index
tslice = idx[0]
# make sure it is just one single time slice
assert tslice.step is None
assert (tslice.stop - tslice.start) == 1
n = tslice.start
fname = '%s.%010d.%s' % (self.prefix, self.iters[n], self.suffix)
#print("Writing %s" % fname)
data.astype(self.dtype).tofile(fname)
# to use
# write all the data to disk
outdir = '/vega/physo/users/jb3210/offline_velocities/aviso_DUACS2014_daily_msla/div_corrected/'
uvel_store = writable_mds_store(outdir + 'uvelCorr', m.iter.values)
with ProgressBar():
m.UVEL_Psi.data.store(uvel_store)
@jklymak
Copy link

jklymak commented Jan 30, 2017

Hi Ryan:

This function "works" for me, but each chunk overwrites the next, so that I only get the last chunk on disk. Since tofile doesn't do any fancy indexing, I'm not surprised by this. If you haven't done something else, I'll work on a version that uses f.seek and f.write to create a file with the write gaps etc...

Thanks, Jody


data = xmitgcm.open_mdsdataset(dirname='./',prefix={'T'},iters=12600,read_grid=True,geometry='cartesian',endian='<',
                               chunks={'Z':1,'time':1})

def interpolateAtDepth(T,x0,y0,x,y):
    import scipy.interpolate
    if np.shape(T)[-1]>1:
        xout=np.zeros((1,1,ny,nx))   
        fit=scipy.interpolate.RectBivariateSpline(x0,y0,T[0,0,:,:].T)
        xout = fit(x,y).T
    else:
        xout=np.ones((1,1,1,1))
    return xout

# x, y, nx, ny are determined elsewhere, but set the new grid...
tm = data['T'].data.map_blocks(interpolateAtDepth,data['XC'].values,data['YC'].values,x,y,chunks=(1,1,ny,nx),
                               dtype=data['T'].data.dtype)

class writable_mds_store:

    def __init__(self, prefix, iters, suffix='data', dtype='>f4'):
        self.prefix = prefix
        self.iters = iters
        self.suffix = suffix
        self.dtype = dtype

    def __setitem__(self, idx, data):
        # first slice should be the time index
        tslice = idx[0]
        assert tslice.step is None
        assert (tslice.stop - tslice.start) == 1
        n = tslice.start
        fname = '%s.%010d.%s' % (self.prefix, self.iters[n], self.suffix)
        print("Writing %s" % fname)
        data.astype(self.dtype).tofile(fname)
        

tnew_store = writable_mds_store('Tnew',[0.],dtype=tm.dtype)
tm.store(tnew_store)

@jklymak
Copy link

jklymak commented Jan 30, 2017

See my fork for a f.seek implimentation that is probably fragile, but works: https://gist.github.com/jklymak/282aa1fece49167af68997bdf2f95780

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment