Created
June 12, 2020 11:51
-
-
Save ljwolf/2da0a8f52bba6dd9fcfcf6faf97b24e7 to your computer and use it in GitHub Desktop.
Losh function by @jeffcsauer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
from scipy import sparse | |
from scipy import stats | |
from sklearn.base import BaseEstimator | |
import pysal.lib as lp | |
class losh(BaseEstimator): | |
"""Local spatial heteroscedasticity (LOSH)""" | |
def __init__(self, connectivity=None, inference=None): | |
""" | |
Initialize a losh estimator | |
Arguments | |
--------- | |
connectivity: scipy.sparse matrix object | |
the connectivity structure describing the relationships | |
between observed units. | |
inference: str | |
describes type of inference to be used. options are | |
"chi-square", "permutation", or "simulation". | |
Attributes | |
---------- | |
Hi: numpy array | |
Array of LOSH values for each spatial unit. | |
ylag: array | |
Spatially lagged y values. | |
yresid: array | |
Spatially lagged residual values. | |
VarHi: array | |
Variance of Hi. | |
pval: numpy array | |
P-values for inference based on either "chi-square", | |
"permutation", or "simulation" approaches. | |
""" | |
self.connectivity = connectivity | |
self.inference = inference | |
def fit(self, y, a=None): | |
""" | |
Arguments | |
--------- | |
y : numpy.ndarray | |
array containing continuous data | |
a : int | |
residual multiplier. Default is 2 in order to generate a | |
variance measure. Users may use 1 for absolute deviations. | |
Returns | |
------- | |
the fitted estimator. | |
Notes | |
----- | |
Technical details and derivations can be found in :cite:`OrdGetis2012`. | |
""" | |
y = np.asarray(y).flatten() | |
w = self.connectivity | |
self.Hi, self.ylag, self.yresid, self.VarHi = self._statistic(y, w, a) | |
if self.inference is None: | |
return self | |
elif self.inference == "chi-square": | |
print("Note: chi-square inference selected. This assumes a=2.") | |
dof = 2/self.VarHi | |
Zi = (2*self.Hi)/self.VarHi | |
self.pval = 1 - stats.chi2.cdf(Zi, dof) | |
return self | |
@staticmethod | |
def _statistic(y, w, a): | |
# Define what type of variance to use | |
if a is None: | |
a = 2 | |
else: | |
a = a | |
Wrs = [round(np.sum(list(w[y].values()))) for y in range(len(y))] | |
# Calculate spatial mean | |
ylag = lp.weights.lag_spatial(w, y)/Wrs | |
# Calculate and adjust residuals based on multiplier | |
yresid = abs(y-ylag)**a | |
# Calculate denominator of Hi calculation | |
# as mean of residuals | |
denom = int(np.mean(yresid)) * np.array(Wrs) | |
# Carry out final $H_{i}$ calculation by dividing | |
# spatial average of residuals by denom | |
Hi = lp.weights.lag_spatial(w, yresid) / denom | |
# Calculate variance | |
n = len(y) | |
# Calculate average of residuals | |
yresid_mean = np.mean(yresid) | |
# Calculate VarHi | |
VarHi = ((n-1)**-1) * \ | |
(denom**-2) * \ | |
((np.sum(yresid**2)/n) - yresid_mean**2) * \ | |
((n*np.array([np.sum(np.array(list(w[y].values()))**2) for y in range(len(y))])) - np.array(Wrs)**2) | |
VarHi | |
return (Hi, ylag, yresid, VarHi) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment