Last active
December 10, 2020 20:06
-
-
Save lukegre/4bb5c483b2d111e52413b260311fbe43 to your computer and use it in GitHub Desktop.
function that takes the trend for an xarray.DataArray over the 'time' variable. Returns the slopes and p-values for each location.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dataset_encoding(xds): | |
cols = ['source', 'original_shape', 'dtype', 'zlib', 'complevel', 'chunksizes'] | |
info = pd.DataFrame(columns=cols, index=xds.data_vars) | |
for row in info.index: | |
var_encoding = xds[row].encoding | |
for col in info.keys(): | |
info.ix[row, col] = var_encoding.pop(col, '') | |
return info | |
def xarray_trend(xarr): | |
from scipy import stats | |
# getting shapes | |
m = np.prod(xarr.shape[1:]).squeeze() | |
n = xarr.shape[0] | |
# creating x and y variables for linear regression | |
x = xarr.time.to_pandas().index.to_julian_date().values[:, None] | |
y = xarr.to_masked_array().reshape(n, -1) | |
# ############################ # | |
# LINEAR REGRESSION DONE BELOW # | |
xm = x.mean(0) # mean | |
ym = y.mean(0) # mean | |
ya = y - ym # anomaly | |
xa = x - xm # anomaly | |
# variance and covariances | |
xss = (xa ** 2).sum(0) / (n - 1) # variance of x (with df as n-1) | |
yss = (ya ** 2).sum(0) / (n - 1) # variance of y (with df as n-1) | |
xys = (xa * ya).sum(0) / (n - 1) # covariance (with df as n-1) | |
# slope and intercept | |
slope = xys / xss | |
intercept = ym - (slope * xm) | |
# statistics about fit | |
df = n - 2 | |
r = xys / (xss * yss)**0.5 | |
t = r * (df / ((1 - r) * (1 + r)))**0.5 | |
p = stats.distributions.t.sf(abs(t), df) | |
# misclaneous additional functions | |
# yhat = dot(x, slope[None]) + intercept | |
# sse = ((yhat - y)**2).sum(0) / (n - 2) # n-2 is df | |
# se = ((1 - r**2) * yss / xss / df)**0.5 | |
# preparing outputs | |
out = xarr[:2].mean('time') | |
# first create variable for slope and adjust meta | |
xarr_slope = out.copy() | |
xarr_slope.name += '_slope' | |
xarr_slope.attrs['units'] = 'units / day' | |
xarr_slope.values = slope.reshape(xarr.shape[1:]) | |
# do the same for the p value | |
xarr_p = out.copy() | |
xarr_p.name += '_Pvalue' | |
xarr_p.attrs['info'] = "If p < 0.05 then the results from 'slope' are significant." | |
xarr_p.values = p.reshape(xarr.shape[1:]) | |
# join these variables | |
xarr_out = xarr_slope.to_dataset(name='slope') | |
xarr_out['pval'] = xarr_p | |
return xarr_out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment