Created
December 9, 2011 16:23
-
-
Save twiecki/1452214 to your computer and use it in GitHub Desktop.
Simulating drift-processes on the GPU using PyCuda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import pycuda.compiler | |
import pycuda.gpuarray as gpuarray | |
import pycuda.autoinit | |
import pycuda.curandom | |
from pycuda.cumath import exp as pycuda_exp | |
from pycuda.compiler import SourceModule | |
import matplotlib.pyplot as plt | |
from kabuki.utils import scipy_stochastic | |
import numpy as np | |
import scipy as sp | |
import scipy.stats | |
import pymc as pm | |
import hddm | |
code = """ | |
#include <curand_kernel.h> | |
extern "C" | |
{ | |
__global__ void sim_drift(curandState *global_state, float const v, float const V, float const a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, float *out) | |
{ | |
float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time; | |
int idx = blockIdx.x*blockDim.x + threadIdx.x; | |
curandState local_state = global_state[idx]; | |
/* Sample variability parameters. */ | |
start_delay = curand_uniform(&local_state)*T + (t-T/2); | |
start_point = (curand_uniform(&local_state)*Z + (z-Z/2))*a; | |
drift_rate = curand_normal(&local_state)*V + v; | |
/* Set up drift variables. */ | |
prob_up = .5f*(1+sqrtf(dt)/intra_sv*drift_rate); | |
step_size = sqrtf(dt)*intra_sv; | |
time = start_delay; | |
position = start_point; | |
/* Simulate particle movement until threshold is crossed. */ | |
while (position > 0 & position < a) { | |
rand = curand_uniform(&local_state); | |
position += ((rand < prob_up)*2 - 1) * step_size; | |
time += dt; | |
} | |
/* Save back state. */ | |
global_state[idx] = local_state; | |
/* Figure out boundary, save result. */ | |
if (position <= 0) { | |
out[idx] = -time; | |
} | |
else { | |
out[idx] = time; | |
} | |
} | |
__global__ void sim_drift_var_thresh(curandState *global_state, float const v, float const V, float const *a, float const z, float const Z, float const t, float const T, float const dt, float const intra_sv, int const a_len, float *out) | |
{ | |
float start_delay, start_point, drift_rate, rand, prob_up, position, step_size, time; | |
int idx = blockIdx.x*blockDim.x + threadIdx.x; | |
int x_pos = 0; | |
curandState local_state = global_state[idx]; | |
start_delay = curand_uniform(&local_state)*T + (t-T/2); | |
start_point = curand_uniform(&local_state)*Z + (z-Z/2); | |
drift_rate = curand_normal(&local_state)*V + v; | |
prob_up = .5f*(1+sqrtf(dt)/intra_sv*drift_rate); | |
step_size = sqrtf(dt)*intra_sv; | |
time = 0; | |
position = start_point; | |
while (fabs(position) < a[x_pos] & time < a_len) { | |
rand = curand_uniform(&local_state); | |
position += ((rand < prob_up)*2 - 1) * step_size; | |
time += dt; | |
x_pos++; | |
} | |
time += start_delay; | |
global_state[idx] = local_state; | |
if (position <= 0) { | |
out[idx] = -time; | |
} | |
else { | |
out[idx] = time; | |
} | |
} | |
__global__ void fill_normal(curandState *global_state, float *out) | |
{ | |
int idx = blockIdx.x*blockDim.x + threadIdx.x; | |
curandState local_state = global_state[idx]; | |
out[idx] = curand_normal(&local_state); | |
global_state[idx] = local_state; | |
} | |
/* __global__ void sim_drift_switch(curandState *global_state, float const vpp, float const vcc, float const a, float const z, float const t, float const tcc, float const dt, float const intra_sv, float *out) | |
{ | |
float start_delay, start_point, rand, prob_up_pp, prob_up_cc, position, step_size, time; | |
int idx = blockIdx.x*blockDim.x + threadIdx.x; | |
curandState local_state = global_state[idx]; | |
start_delay = t; | |
start_point = z; | |
prob_up_pp = .5f*(1+sqrtf(dt)/intra_sv*vpp); | |
prob_up_cc = .5f*(1+sqrtf(dt)/intra_sv*vcc); | |
step_size = sqrtf(dt)*intra_sv; | |
time = 0; | |
position = start_point; | |
while (fabs(position) < a) { | |
rand = curand_uniform(&local_state); | |
if time < tcc { | |
position += ((rand < prob_up_pp)*2 - 1) * step_size; | |
} | |
else { | |
position += ((rand < prob_up_cc)*2 - 1) * step_size; | |
time += dt; | |
} | |
time += start_delay; | |
global_state[idx] = local_state; | |
if (position <= 0) { | |
out[idx] = -time; | |
} | |
else { | |
out[idx] = time; | |
} | |
} | |
*/ | |
} | |
""" | |
mod = SourceModule(code, keep=False, no_extern_c=True) | |
_size = 512 | |
_sim_drift_cuda = mod.get_function("sim_drift") | |
_sim_drift_var_thresh_cuda = mod.get_function("sim_drift_var_thresh") | |
fill_normal = mod.get_function("fill_normal") | |
_generator = None | |
_thresh = [] | |
_thresh_gpu = None | |
_out = None | |
def sim_drift(v, V, a, z, Z, t, T, size=512, dt=1e-4, update=False, return_gpu=False): | |
global _generator, _out, _size | |
size = np.long(size) | |
if _generator is None or update: | |
_generator = pycuda.curandom.XORWOWRandomNumberGenerator() | |
max_size = _generator.generators_per_block | |
if size // max_size > 1: | |
print "too big" | |
if _out is None or update: | |
_out = gpuarray.empty(size, dtype=np.float32) | |
_sim_drift_cuda(_generator.state, np.float32(v), np.float32(V), np.float32(a), np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), _out, block=(64, 1, 1), grid=(size // 64 + 1, 1)) | |
if return_gpu: | |
return _out | |
else: | |
return _out.get() | |
def sim_drift_var_thresh(v, V, a, z, Z, t, T, max_time, size=512, dt=1e-4, update=False, return_gpu=False): | |
global _generator, _thresh, _thresh_gpu, _out | |
# Init | |
if _generator is None or update: | |
_generator = pycuda.curandom.XORWOWRandomNumberGenerator() | |
max_size = _generator.generators_per_block | |
if size / max_size > 1: | |
print "too big" | |
if _thresh_gpu is None or update or np.any(_thresh != a): | |
if isinstance(a, pycuda.gpuarray.GPUArray): | |
_thresh_gpu = a | |
else: | |
_thresh_gpu = gpuarray.to_gpu(a) | |
if _out is None or update: | |
_out = gpuarray.empty(size, dtype=np.float32) | |
_sim_drift_var_thresh_cuda(_generator.state, np.float32(v), np.float32(V), _thresh_gpu, np.float32(z), np.float32(Z), np.float32(t), np.float32(T), np.float32(dt), np.float32(1), np.float32(max_time), _out, block=(64, 1, 1), grid=(size // 64 + 1,1)) | |
if return_gpu: | |
return _out | |
else: | |
return _out.get() | |
def gen_weibull_gpu(a, k, l, max_time=5, dt=1e-4): | |
max_time = np.float32(max_time) | |
x = pycuda.gpuarray.arange(0., max_time, dt, dtype=np.float32) | |
# Weibull pdf | |
thresh_func_gpu = k / l * (x / l)**(k - 1) * pycuda_exp(-(x / l)**k) | |
thresh_func_gpu *= a | |
return thresh_func_gpu | |
sum_stats_len = 2 * 10 + 1 | |
def compute_stats_vec(data): | |
stats_vec = np.empty(sum_stats_len) | |
lower = np.abs(data[data<0]) | |
upper = data[data>0] | |
for i, data_resp in enumerate([lower, upper]): | |
# mean | |
stats_vec[i * 10 + 0] = np.mean(data_resp) | |
# std | |
stats_vec[i * 10 + 1] = np.std(data_resp) | |
# skew | |
stats_vec[i * 10 + 2] = sp.stats.skew(data_resp) | |
# 7 quantiles | |
stats_vec[i * 10 + 3:i * 10 + 9] = stats_vec.extend(sp.stats.mstats.mquantiles(data_resp, np.linspace(0,1,8, endpoint=False)[1:])) | |
# Perc of resps | |
stats_vec[-1] = len(upper) / len(lower) | |
return stats_vec | |
def synth_likelihood(data, v, V, a, a_k, a_l, z, Z, t, T, samples=50, drifts_per_sample=512): | |
"""Compute synthetic likelihood for collapsing threshold wfpt.""" | |
def mv_normal_like(s, mu, cov): | |
s = np.asmatrix(s) | |
mu = np.asmatrix(mu) | |
cov = np.asmatrix(cov) | |
return .5 * (s - mu) * (cov**-1) * (s - mu).T - .5 * np.log(np.linalg.det(cov)) | |
summary_vecs = np.empty((samples, sum_stats_len)) | |
thresh = gen_weibull_gpu(a, a_k, a_l) | |
for sample in range(samples): | |
rts = sim_drift_var_thresh(v, V, thresh, z, Z, t, T) | |
summary_vecs[sample, :] = compute_stats_vec(rts.get()) | |
summary_data = compute_stats_vec(data) | |
mean = np.mean(summary_vecs, axis=0) | |
cov = np.cov(summary_vecs) | |
# Evaluate synth likelihood | |
logp = mv_normal_like(summary_data, mean, cov) | |
return logp | |
WienerVarThresh = pm.stochastic_from_dist(name="Wiener synthetic likelihood", | |
logp=synth_likelihood, | |
dtype=np.float32, | |
mv=False) | |
class wfpt_gpu_gen(hddm.likelihoods.wfpt_gen): | |
sampling_method = 'gpu' | |
def _rvs(self, v, V, a, z, Z, t, T): | |
param_dict = {'v':v, 'z':z, 't':t, 'a':a, 'Z':Z, 'V':V, 'T':T} | |
print self._size | |
if self.sampling_method == 'gpu': | |
size = 100 | |
rts = [] | |
for i in np.round(np.arange(0, self._size, size)): | |
sampled_rts = sim_drift(v, V, a, z, Z, t, T, size=size, dt=self.dt) | |
rts.append(sampled_rts) | |
sampled_rts = np.concatenate(rts) | |
else: | |
sampled_rts = hddm.generate.gen_rts(param_dict, method=self.sampling_method, samples=self._size, dt=self.dt) | |
return sampled_rts[:self._size] | |
wfpt_gpu_like = scipy_stochastic(wfpt_gpu_gen, name='wfpt_gpu') | |
def main(): | |
thresh_func = gen_weibull_gpu(3, 1, 1) | |
max_time = 5. | |
dt = 1e-4 | |
thresh_const = np.ones(max_time/dt, dtype=np.float32) | |
thresh_func_const = pycuda.gpuarray.to_gpu(thresh_const) | |
print thresh_func_const.get() | |
plt.plot(np.arange(0, max_time, dt), thresh_func.get()) | |
plt.plot(np.arange(0, max_time, dt), -thresh_func.get()) | |
#thresh_func = np.array(a*np.exp(-rate*np.linspace(0, max_time, steps)), dtype=np.float32) | |
size = 412 | |
out = sim_drift_var_thresh(.5, .1, thresh_func, 0., .1, .3, .1, max_time, size=size, update=True) | |
plt.figure() | |
plt.hist(out, bins=40) | |
out = sim_drift(1, .1, 2, .5, .1, .3, .1, size) | |
plt.figure() | |
plt.hist(out, bins=40) | |
plt.show() | |
if __name__ == '__main__': | |
size = 512 | |
generator = pycuda.curandom.XORWOWRandomNumberGenerator() | |
max_size = generator.generators_per_block | |
out = gpuarray.empty(size, dtype=np.float32) | |
fill_normal(generator.state, out, block=(64, 1, 1), grid=(size // 64 + 1, 1)) | |
data = out.get() | |
print data.mean() | |
print data.std() | |
if size > max_size: | |
print data[:max_size].mean() | |
print data[:max_size].std() | |
plt.hist(data, bins=50) | |
plt.show() | |
#main() | |
x = np.linspace(-5, 5, 100) | |
sampler = wfpt_gpu_like.rv | |
params = hddm.generate.gen_rand_params() | |
data = sampler.rvs(params['v'], params['V'], params['a'], params['z'], | |
params['Z'], params['t'], params['T'], size=size) | |
hist = np.histogram(data, range=(-5,5), bins=100, density=True)[0] | |
cumsum = np.cumsum(data) | |
plt.plot(x, sampler.cdf(x, params['v'], params['V'], params['a'], params['z'], | |
params['Z'], params['t'], params['T'])) | |
plt.plot(x, hist) | |
print cumsum.shape | |
plt.show() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import scipy as sp | |
import scipy.stats | |
import pymc as pm | |
samples = np.random.randn(1000) | |
def plot_synth_samples(data, lower_samples=10, upper_samples=200, lower_dps=10, upper_dps=200, interval=50): | |
x, y = np.meshgrid(np.arange(lower_samples, upper_samples, interval), np.arange(lower_dps, upper_dps, interval)) | |
#plt.figure() | |
for i in range(len(x)): | |
for j in range(len(y)): | |
plt.subplot(x.shape[0], x.shape[1], i*x.shape[0]+j+1) | |
if i == len(x)-1: | |
plt.xlabel('%d'%x[i,j]) | |
if j == 0: | |
plt.ylabel('%d'%y[i,j]) | |
#plot_synth_like(data, samples=np.round(x[i, j]), dataset_samples=np.round(y[i, j])) | |
def plot_synth_like(data, lower_mu=-1, upper_mu=1, lower_sig=.8, upper_sig=1.5, steps=50, samples=100, dataset_samples=100, plot=True): | |
x, y = np.meshgrid(np.linspace(lower_mu, upper_mu, steps), np.linspace(lower_sig, upper_sig, steps)) | |
z = np.empty_like(x) | |
for i in range(len(x)): | |
for j in range(len(y)): | |
z[i,j] = synth_likelihood(data, x[i,j], y[i,j], samples=samples, dataset_samples=dataset_samples) | |
plt.contourf(x, y, z) | |
def plot_erroranalysis(data_samples=(10,), dataset_samples=(10,), datasets=200): | |
x = dataset_samples | |
y = np.empty(x.shape, dtype=np.float) | |
for data_sample in data_samples: | |
data = np.random.randn(data_sample) | |
for i, dataset_sample in enumerate(dataset_samples): | |
errors = [] | |
sl_sum = 0 | |
pt_sum = 0 | |
for rep in range(1, 400): | |
# Chose two random mu pts | |
mu1 = 0 | |
mu2 = (np.random.rand()-.5) | |
# Evaluate true likelihood | |
pt1 = pm.normal_like(data, mu=mu1, tau=1**-2) | |
pt2 = pm.normal_like(data, mu=mu2, tau=1**-2) | |
ptr = pt1 / pt2 | |
pt_sum += pt1 | |
pt_sum += pt2 | |
#print ptr | |
# Evaluate synth likelihood | |
ps1 = synth_likelihood(data, mu1, 1, dataset_samples=x[i], samples=datasets) | |
ps2 = synth_likelihood(data, mu2, 1, dataset_samples=x[i], samples=datasets) | |
sl_sum += ps1 | |
sl_sum += ps2 | |
pts = ps1 / ps2 | |
#print pts | |
errors.append((pts - ptr)**2) | |
print pt_sum | |
print sl_sum | |
y[i] = np.mean(errors) | |
plt.plot(x, y, label='%i' % data_sample) | |
plt.xlabel('Number of samples per dataset') | |
plt.ylabel('MSE') | |
plt.legend() | |
def plot_ratio_analysis(data_samples=(100,), dataset_samples=(100,), datasets=100): | |
x, y = np.meshgrid(data_samples, dataset_samples) | |
z = np.empty(x.shape, dtype=np.float) | |
for i, data_sample in enumerate(data_samples): | |
for j, dataset_sample in enumerate(dataset_samples): | |
data = np.random.randn(x[j, i]) | |
errors = [] | |
sl_sum = 0 | |
pt_sum = 0 | |
for rep in range(1, 200): | |
# Chose two random mu pts | |
mu1 = (np.random.rand()-.5) * 3 | |
mu2 = (np.random.rand()-.5) * 3 | |
# Evaluate true likelihood | |
pt1 = pm.normal_like(data, mu=mu1, tau=1) | |
pt2 = pm.normal_like(data, mu=mu2, tau=1) | |
ptr = pt1 / pt2 | |
pt_sum += pt1 | |
pt_sum += pt2 | |
#print ptr | |
# Evaluate synth likelihood | |
ps1 = synth_likelihood(data, mu1, 1, dataset_samples=y[j, i], samples=datasets) | |
ps2 = synth_likelihood(data, mu2, 1, dataset_samples=y[j, i], samples=datasets) | |
sl_sum += ps1 | |
sl_sum += ps2 | |
pts = ps1 / ps2 | |
#print pts | |
errors.append((pts - ptr)**2) | |
print pt_sum | |
print sl_sum | |
z[j, i] = np.mean(errors) | |
print x[j, i], y[j,i], z[j, i] | |
print x | |
print y | |
print z | |
cont = plt.contourf(x, y, z) | |
plt.colorbar(cont) | |
plt.xlabel('Number of samples per dataset') | |
plt.ylabel('Size of input data.') | |
def synth_likelihood(data, mu, std, samples=100, dataset_samples=100): | |
"""Compute synthetic likelihood for collapsing threshold wfpt.""" | |
def mv_normal_like(s, mu, cov): | |
s = np.asmatrix(s) | |
mu = np.asmatrix(mu) | |
cov = np.asmatrix(cov) | |
return .5 * (s - mu) * (cov**-1) * (s - mu).T - .5 * np.log(np.linalg.det(cov)) | |
true_sum = np.array((data.mean(), data.std())) #, np.sum(data), data.var())) | |
sum_stats = np.empty((samples, 2)) | |
for sample in range(samples): | |
s = np.random.randn(dataset_samples)*std + mu | |
sum_stats[sample,:] = s.mean(), s.std() #, np.sum(s), s.var() | |
mean = np.mean(sum_stats, axis=0) | |
cov = np.cov(sum_stats.T) | |
# Evaluate synth likelihood | |
logp = mv_normal_like(true_sum, mean, cov) | |
return -logp | |
synth = pm.stochastic_from_dist(name="Wiener synthetic likelihood", | |
logp=synth_likelihood, | |
dtype=np.float32, | |
mv=False) | |
mu = pm.Uniform('mu', lower=-5, upper=5, value=0) | |
std = pm.Uniform('std', lower=.1, upper=2, value=1) | |
sl = synth('Synthetic likelihood', value=samples, mu=mu, std=std, observed=True) | |
#rl = pm.Normal('Regular likelihood', value=samples, mu=mu, tau=std**-2, observed=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment