Skip to content

Instantly share code, notes, and snippets.

@ljwolf
Created November 3, 2017 21:13
Show Gist options
  • Save ljwolf/d6c624aa5694635522780470f8530db6 to your computer and use it in GitHub Desktop.
Save ljwolf/d6c624aa5694635522780470f8530db6 to your computer and use it in GitHub Desktop.
import scipy.stats as st
import copy
import numpy as np
import pysal as ps
def _estimate_kdes(y,z):
"""
estimate a kernel desnity for each group in z (classes) from y (data)
"""
z_unique = np.unique(z).tolist()
distributions = []
for i,zval in enumerate(z_unique):
distributions.append(st.gaussian_kde(y[z==zval]))
return distributions
def forward_predict(y,z,P,local=True, interp=True, diff=True):
"""
Predict a Markov chain foward using sub-distribution resampling.
y : np.ndarray
a flat (n,) array containing the observed data in the time period to predict
z : np.ndarray
a flat (n,) array containing the classification of the data in the final time period
P : np.ndarray
a transition matrix estimated from the (y,z) chain.
local: bool
whether or not to divide the innovation distributions by z.
interp: bool
whether or not to sample from the kernel density estimate or the empirical distribution of innovations
diff: bool
whether or not to predict foward using the distribution of difference chain or the direct chain
NOTE: Innovations refer to the stochastic component added to y to move forward in time. Innovations can either be from the
differences between the current time period and the previous time period, or a resampling of the current time period.
"""
P = np.asarray(P)
z_unique = np.unique(z).tolist()
z_new = []
y_new = []
resample = _dispatch_experiment(y,z,local=True, interp=True, diff=False)
for yi,zi in zip(y,z):
zidx = z_unique.index(zi)
draw = st.multinomial.rvs(1, P[zidx])
zidx_new = draw.argmax()
z_new.append(z_unique[zidx_new])
y_new.append(resample(zidx_new))
return np.vstack(y_new).squeeze(), np.vstack(z_new).squeeze(), resample
def _dispatch_experiment(data, classes, local=True, interp=True, diff=True):
if diff:
assert data.shape[1] == 2 #should be 2 columns with N rows
to_resample = difference(data)
else:
to_resample = data
resample_function = RejectionSampler(to_resample,classes,local=True, interp=interp,diff=diff)
return resample_function
class RejectionSampler(object):
def __init__(self, data, classes, local=True, interp=True, diff=False):
self.data = data
self.classifier = ps.Quantiles(data, k=len(np.unique(classes)))
self.is_local = local
self.is_interp = interp
self.is_diff = diff
if self.is_local:
self.unique_classes = np.unique(classes)
self.classes = classes
else:
self.unique_classes = [1]
self.classes = np.ones(data.shape)
if self.is_interp:
create_dist = lambda x: st.gaussian_kde(x)
else:
create_dist = lambda x: x
self.distributions = [create_dist(data[classes==cls])
for cls in self.unique_classes]
def __call__(self, next_class, this_obs=None):
next_candidate_class = None
while True:
if self.is_local:
next_ix = self.unique_classes.tolist().index(next_class)
else:
next_ix = 0
if self.is_interp:
candidate = self.distributions[next_ix].resample(1)
else:
candidate_set = copy.deepcopy(self.distributions[next_ix])
starting_length = len(candidate_set)
candidate_ix = np.random.randint(len(candidate_set))
candidate = candidate_set[candidate_ix]
self.distributions[next_ix] = np.delete(candidate_set, candidate_ix) #need a refresh?? if you generate unsuccessfully, this destructs
if self.is_diff:
assert (this_obs is not None), "If doing difference-sampling, the current observation must be provided."
candidate = candidate + this_obs
next_candidate_class = self.classifier.find_bin(candidate)
if next_candidate_class == next_class:
return candidate
if not self.is_interp:
self.distributions[next_ix] = candidate_set
assert starting_length == len(self.distributions[next_ix]), 'Your reset did not succeed!'
if __name__ == '__main__':
import pandas as pd
import pysal as ps
import matplotlib.pyplot as plt
import seaborn.apionly as sns
usjoin = pd.read_csv(ps.examples.get_path('usjoin.csv'))
y = usjoin.iloc[:,2:].values
pci = y.T
rpci = (pci.T / pci.mean(axis=1))
qpci = np.vstack([ps.Quantiles(year).yb for year in rpci])
mkv = ps.Markov(qpci)
ylast = rpci[:,-2]
qlast = qpci[:,-2]
y_local_kde, z_local_kde,rs_local_kde = forward_predict(ylast,qlast,mkv.p)
y_global_kde, z_global_kde,rs_global_kde = forward_predict(ylast,qlast,mkv.p,
local=False)
y_local_emp, z_local_emp,rs_local_emp = forward_predict(ylast,qlast,mkv.p,
interp=False)
y_global_emp, z_global_emp,rs_global_emp = forward_predict(ylast,qlast,mkv.p,
local=False, interp=False)
y_local_kde_diff, z_local_kde_diff,rs_local_kde_diff = forward_predict(ylast,qlast,mkv.p,
diff=True)
y_global_kde_diff, z_local_kde_diff,rs_local_kde_diff= forward_predict(ylast,qlast,mkv.p,
local=False, interp=False,
diff=True)
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment