Created
November 3, 2017 21:13
-
-
Save ljwolf/d6c624aa5694635522780470f8530db6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scipy.stats as st | |
import copy | |
import numpy as np | |
import pysal as ps | |
def _estimate_kdes(y,z): | |
""" | |
estimate a kernel desnity for each group in z (classes) from y (data) | |
""" | |
z_unique = np.unique(z).tolist() | |
distributions = [] | |
for i,zval in enumerate(z_unique): | |
distributions.append(st.gaussian_kde(y[z==zval])) | |
return distributions | |
def forward_predict(y,z,P,local=True, interp=True, diff=True): | |
""" | |
Predict a Markov chain foward using sub-distribution resampling. | |
y : np.ndarray | |
a flat (n,) array containing the observed data in the time period to predict | |
z : np.ndarray | |
a flat (n,) array containing the classification of the data in the final time period | |
P : np.ndarray | |
a transition matrix estimated from the (y,z) chain. | |
local: bool | |
whether or not to divide the innovation distributions by z. | |
interp: bool | |
whether or not to sample from the kernel density estimate or the empirical distribution of innovations | |
diff: bool | |
whether or not to predict foward using the distribution of difference chain or the direct chain | |
NOTE: Innovations refer to the stochastic component added to y to move forward in time. Innovations can either be from the | |
differences between the current time period and the previous time period, or a resampling of the current time period. | |
""" | |
P = np.asarray(P) | |
z_unique = np.unique(z).tolist() | |
z_new = [] | |
y_new = [] | |
resample = _dispatch_experiment(y,z,local=True, interp=True, diff=False) | |
for yi,zi in zip(y,z): | |
zidx = z_unique.index(zi) | |
draw = st.multinomial.rvs(1, P[zidx]) | |
zidx_new = draw.argmax() | |
z_new.append(z_unique[zidx_new]) | |
y_new.append(resample(zidx_new)) | |
return np.vstack(y_new).squeeze(), np.vstack(z_new).squeeze(), resample | |
def _dispatch_experiment(data, classes, local=True, interp=True, diff=True): | |
if diff: | |
assert data.shape[1] == 2 #should be 2 columns with N rows | |
to_resample = difference(data) | |
else: | |
to_resample = data | |
resample_function = RejectionSampler(to_resample,classes,local=True, interp=interp,diff=diff) | |
return resample_function | |
class RejectionSampler(object): | |
def __init__(self, data, classes, local=True, interp=True, diff=False): | |
self.data = data | |
self.classifier = ps.Quantiles(data, k=len(np.unique(classes))) | |
self.is_local = local | |
self.is_interp = interp | |
self.is_diff = diff | |
if self.is_local: | |
self.unique_classes = np.unique(classes) | |
self.classes = classes | |
else: | |
self.unique_classes = [1] | |
self.classes = np.ones(data.shape) | |
if self.is_interp: | |
create_dist = lambda x: st.gaussian_kde(x) | |
else: | |
create_dist = lambda x: x | |
self.distributions = [create_dist(data[classes==cls]) | |
for cls in self.unique_classes] | |
def __call__(self, next_class, this_obs=None): | |
next_candidate_class = None | |
while True: | |
if self.is_local: | |
next_ix = self.unique_classes.tolist().index(next_class) | |
else: | |
next_ix = 0 | |
if self.is_interp: | |
candidate = self.distributions[next_ix].resample(1) | |
else: | |
candidate_set = copy.deepcopy(self.distributions[next_ix]) | |
starting_length = len(candidate_set) | |
candidate_ix = np.random.randint(len(candidate_set)) | |
candidate = candidate_set[candidate_ix] | |
self.distributions[next_ix] = np.delete(candidate_set, candidate_ix) #need a refresh?? if you generate unsuccessfully, this destructs | |
if self.is_diff: | |
assert (this_obs is not None), "If doing difference-sampling, the current observation must be provided." | |
candidate = candidate + this_obs | |
next_candidate_class = self.classifier.find_bin(candidate) | |
if next_candidate_class == next_class: | |
return candidate | |
if not self.is_interp: | |
self.distributions[next_ix] = candidate_set | |
assert starting_length == len(self.distributions[next_ix]), 'Your reset did not succeed!' | |
if __name__ == '__main__': | |
import pandas as pd | |
import pysal as ps | |
import matplotlib.pyplot as plt | |
import seaborn.apionly as sns | |
usjoin = pd.read_csv(ps.examples.get_path('usjoin.csv')) | |
y = usjoin.iloc[:,2:].values | |
pci = y.T | |
rpci = (pci.T / pci.mean(axis=1)) | |
qpci = np.vstack([ps.Quantiles(year).yb for year in rpci]) | |
mkv = ps.Markov(qpci) | |
ylast = rpci[:,-2] | |
qlast = qpci[:,-2] | |
y_local_kde, z_local_kde,rs_local_kde = forward_predict(ylast,qlast,mkv.p) | |
y_global_kde, z_global_kde,rs_global_kde = forward_predict(ylast,qlast,mkv.p, | |
local=False) | |
y_local_emp, z_local_emp,rs_local_emp = forward_predict(ylast,qlast,mkv.p, | |
interp=False) | |
y_global_emp, z_global_emp,rs_global_emp = forward_predict(ylast,qlast,mkv.p, | |
local=False, interp=False) | |
y_local_kde_diff, z_local_kde_diff,rs_local_kde_diff = forward_predict(ylast,qlast,mkv.p, | |
diff=True) | |
y_global_kde_diff, z_local_kde_diff,rs_local_kde_diff= forward_predict(ylast,qlast,mkv.p, | |
local=False, interp=False, | |
diff=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment