danstowell · December 19, 2015 02:29
diff --git a/data1d_gchangepoint.data.R b/data1d_gchangepoint.data.R
 D <- 1
 N <- 100
 beta_a <- 0.05
 beta_b <- 0.5
 x <- structure(c(26.9413373923, 29.1350302225, 29.1582008922, 28.7840903488, 24.7588135825, 24.5383857689, 22.0150222148, 34.3357977978, 33.3175453175, 27.0393355201, 29.0391675549, 23.1313991805, 22.26141398, 20.8979942215, 25.4268018477, 29.8337205377, 31.127327082, 24.100751496, 27.5635478812, 23.8609747894, 23.9787664652, 21.6289647513, 26.669515063, 32.5843649319, 25.4968136481, 35.7060523151, 25.5803512966, 41.8961419951, 26.7957989466, 17.9698918602, 33.0624273937, 18.4957449301, 26.8025773082, 26.973815048, 27.8235580668, 20.4345371567, 23.0191935867, 27.3180574495, 35.26315069, 27.3524079853, 26.671244883, 28.1304610893, 25.4276672374, 26.9752666167, 24.0932313213, 22.4464662722, 17.8121045108, 20.2510237823, 29.0263323195, 22.4228546253, 40.6781371183, 39.0103908783, 38.3286502205, 40.3057236764, 38.8028148176, 38.4926162261, 37.8330233658, 41.8476895218, 43.5582415338, 40.9933617032, 41.6652154894, 38.7461368463, 41.6872160115, 39.1353024768, 39.6799895743, 38.0130669247, 40.3167870802, 41.966777727, 39.0184807232, 42.7684980965, 20.6920959805, 29.9048704243, 18.4098454233, 21.9488506938, 18.6524318618, 16.1213098914, 20.6599361337, 13.1551345137, 26.5358791537, 18.5118139236, 12.5105047756, 20.2924726272, 2.89016357801, 18.3037137469, 15.1159489074, 10.3759168919, 11.7802272432, 13.199743196, 13.0996584429, 12.0633674015, 18.7592769154, 22.5640197513, 15.1234925981, 11.7277881134, 18.1334624574, 23.3033279461, 5.78178129213, 17.9468316252, 22.8030632839, 15.6659776065), .Dim=c(100, 1))
 # true_changepoints <- [0, 50, 70]
 # true_means <- [26.64919754508517, 40.61729705175906, 15.772935046942852]
 # true_stdvs <- [4.516041208860003, 1.849860101333368, 6.549061725453291]
diff --git a/gchangepoint.stan b/gchangepoint.stan
 /*
 cd ~/dev/stan/stan
 make ~/git/stored_docs/dev/stan/gchangepoint/gchangepoint

 cd ~/git/stored_docs/dev/stan/gchangepoint/
 ./gchangepoint --data=data1d_gchangepoint.data.R
 ./gchangepoint --data=data1d_gchangepoint.data.R --init=init1d_gchangepoint.data.R
 ./gchangepoint --data=data1d_gchangepoint.data.R --warmup=1000 --iter=20000
 ~/dev/stan/stan/bin/print samples.csv
 ~/dev/stan/stan/bin/print samples.csv | grep changeness | cut -c 25-29 > inferred_changeness.csv
 xdg-open inferred_changeness.csv
 */
 data {
 	int<lower=1> D; // num dims
 	int<lower=0> N; // num observations
 	vector[D] x[N]; // obsns

 	real<lower=0> beta_a; // param for beta prior on changeness
 	real<lower=0> beta_b;  // ditto
 	// - NB in our case we want to encourage sparsity in changeness, and have just a few coefs at the upper end, hence asymmetric
 	// beta possibilities: (0.05, 0.5) v strong bimodal poss too harsh, (0.6, 0.8) gentler but still good lop
 }
 parameters {
 	// here's the crossfade strength per transition
 	real<lower=0,upper=1> changeness[N-1];
 	vector[D] newgen_means[N];
 	vector<lower=0>[D] newgen_stdvs[N];
 }
 model {
 	vector[D] gen_means[N];
 	vector[D] gen_stdvs[N];

 	changeness ~ beta(beta_a, beta_b);

 	// each datum is associated with its own generating gaussian
 	// Common prior for the brandnew (would be nice to write this without looping but stan says no)
 	for (n in 1:N){
 		newgen_means[n] ~ normal(0.0, 100.0);
 		newgen_stdvs[n] ~ exponential(2.0);
 	}

 	// each datum's actual gaussian is some crossfaded mix
 	// x[1] is special, it simply comes from the brandnew
 	gen_means[1] <- newgen_means[1];
 	gen_stdvs[1] <- newgen_stdvs[1];
 	x[1] ~ normal(newgen_means[1], newgen_stdvs[1]);
 	// x[2:] is sampled from a gaussian with parameters crossfaded from the prev and the brandnew
 	for (n in 2:N) {
 		gen_means[n] <- (changeness[n-1] * newgen_means[n]) + ((1.0 - changeness[n-1]) * gen_means[n-1]);
 		gen_stdvs[n] <- (changeness[n-1] * newgen_stdvs[n]) + ((1.0 - changeness[n-1]) * gen_stdvs[n-1]); // to consider: better stdev crossfade
 	}

 	// now the obsns actually get generated
 	for (n in 1:N) {
 		x[n] ~ normal(gen_means[n], gen_stdvs[n]);
 	}
 }

diff --git a/init1d_gchangepoint.data.R b/init1d_gchangepoint.data.R
 changeness <- structure(c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), .Dim=c(99))

diff --git a/makedata.py b/makedata.py
 #!/usr/bin/env python
 """
 python makedata.py > data1d_gchangepoint.data.R
 """

 import numpy as np
 from itertools import chain

 # Simple changepoint model - here with explicitly-chosen changepoints for easy inspection

 n = 100  # num obs
 d = 1
 beta_params = (0.05, 0.5)

 changepoints = [0, 50, 70]  # 0 must always be in here please

 # how to sample the generating mean
 parentgaussian_mean_mean = 30
 parentgaussian_mean_stdv = 10
 # how to sample the generating stdev
 parentgaussian_stdv_mean = 6
 parentgaussian_stdv_stdv = 3

 x = []  # obsns

 means = []
 stdvs = []

 # generate observations
 for i in range(n):
 	if i in changepoints:
 		cur_mean = np.random.normal(parentgaussian_mean_mean, parentgaussian_mean_stdv)
 		cur_stdv = abs(np.random.normal(parentgaussian_stdv_mean, parentgaussian_stdv_stdv))
 		means.append(cur_mean)
 		stdvs.append(cur_stdv)
 	x.append(np.random.normal([cur_mean], [cur_stdv]))

 flat = list(chain.from_iterable(zip(*x)))

 # write R data
 print "D <- %i" % d
 print "N <- %i" % n
 print "beta_a <- %g" % beta_params[0]
 print "beta_b <- %g" % beta_params[1]
 print "x <- structure(c(" +  ", ".join(map(str, flat)) + "), .Dim=c(%i, %i))" % (len(x), d)
 print "# true_changepoints <- %s" % str(changepoints)
 print "# true_means <- %s" % str(means)
 print "# true_stdvs <- %s" % str(stdvs)
	D <- 1
	N <- 100
	beta_a <- 0.05
	beta_b <- 0.5
	x <- structure(c(26.9413373923, 29.1350302225, 29.1582008922, 28.7840903488, 24.7588135825, 24.5383857689, 22.0150222148, 34.3357977978, 33.3175453175, 27.0393355201, 29.0391675549, 23.1313991805, 22.26141398, 20.8979942215, 25.4268018477, 29.8337205377, 31.127327082, 24.100751496, 27.5635478812, 23.8609747894, 23.9787664652, 21.6289647513, 26.669515063, 32.5843649319, 25.4968136481, 35.7060523151, 25.5803512966, 41.8961419951, 26.7957989466, 17.9698918602, 33.0624273937, 18.4957449301, 26.8025773082, 26.973815048, 27.8235580668, 20.4345371567, 23.0191935867, 27.3180574495, 35.26315069, 27.3524079853, 26.671244883, 28.1304610893, 25.4276672374, 26.9752666167, 24.0932313213, 22.4464662722, 17.8121045108, 20.2510237823, 29.0263323195, 22.4228546253, 40.6781371183, 39.0103908783, 38.3286502205, 40.3057236764, 38.8028148176, 38.4926162261, 37.8330233658, 41.8476895218, 43.5582415338, 40.9933617032, 41.6652154894, 38.7461368463, 41.6872160115, 39.1353024768, 39.6799895743, 38.0130669247, 40.3167870802, 41.966777727, 39.0184807232, 42.7684980965, 20.6920959805, 29.9048704243, 18.4098454233, 21.9488506938, 18.6524318618, 16.1213098914, 20.6599361337, 13.1551345137, 26.5358791537, 18.5118139236, 12.5105047756, 20.2924726272, 2.89016357801, 18.3037137469, 15.1159489074, 10.3759168919, 11.7802272432, 13.199743196, 13.0996584429, 12.0633674015, 18.7592769154, 22.5640197513, 15.1234925981, 11.7277881134, 18.1334624574, 23.3033279461, 5.78178129213, 17.9468316252, 22.8030632839, 15.6659776065), .Dim=c(100, 1))
	# true_changepoints <- [0, 50, 70]
	# true_means <- [26.64919754508517, 40.61729705175906, 15.772935046942852]
	# true_stdvs <- [4.516041208860003, 1.849860101333368, 6.549061725453291]
	/*
	cd ~/dev/stan/stan
	make ~/git/stored_docs/dev/stan/gchangepoint/gchangepoint

	cd ~/git/stored_docs/dev/stan/gchangepoint/
	./gchangepoint --data=data1d_gchangepoint.data.R
	./gchangepoint --data=data1d_gchangepoint.data.R --init=init1d_gchangepoint.data.R
	./gchangepoint --data=data1d_gchangepoint.data.R --warmup=1000 --iter=20000
	~/dev/stan/stan/bin/print samples.csv
	~/dev/stan/stan/bin/print samples.csv \| grep changeness \| cut -c 25-29 > inferred_changeness.csv
	xdg-open inferred_changeness.csv
	*/
	data {
	int<lower=1> D; // num dims
	int<lower=0> N; // num observations
	vector[D] x[N]; // obsns

	real<lower=0> beta_a; // param for beta prior on changeness
	real<lower=0> beta_b; // ditto
	// - NB in our case we want to encourage sparsity in changeness, and have just a few coefs at the upper end, hence asymmetric
	// beta possibilities: (0.05, 0.5) v strong bimodal poss too harsh, (0.6, 0.8) gentler but still good lop
	}
	parameters {
	// here's the crossfade strength per transition
	real<lower=0,upper=1> changeness[N-1];
	vector[D] newgen_means[N];
	vector<lower=0>[D] newgen_stdvs[N];
	}
	model {
	vector[D] gen_means[N];
	vector[D] gen_stdvs[N];

	changeness ~ beta(beta_a, beta_b);

	// each datum is associated with its own generating gaussian
	// Common prior for the brandnew (would be nice to write this without looping but stan says no)
	for (n in 1:N){
	newgen_means[n] ~ normal(0.0, 100.0);
	newgen_stdvs[n] ~ exponential(2.0);
	}

	// each datum's actual gaussian is some crossfaded mix
	// x[1] is special, it simply comes from the brandnew
	gen_means[1] <- newgen_means[1];
	gen_stdvs[1] <- newgen_stdvs[1];
	x[1] ~ normal(newgen_means[1], newgen_stdvs[1]);
	// x[2:] is sampled from a gaussian with parameters crossfaded from the prev and the brandnew
	for (n in 2:N) {
	gen_means[n] <- (changeness[n-1] * newgen_means[n]) + ((1.0 - changeness[n-1]) * gen_means[n-1]);
	gen_stdvs[n] <- (changeness[n-1] * newgen_stdvs[n]) + ((1.0 - changeness[n-1]) * gen_stdvs[n-1]); // to consider: better stdev crossfade
	}

	// now the obsns actually get generated
	for (n in 1:N) {
	x[n] ~ normal(gen_means[n], gen_stdvs[n]);
	}
	}
	#!/usr/bin/env python
	"""
	python makedata.py > data1d_gchangepoint.data.R
	"""

	import numpy as np
	from itertools import chain

	# Simple changepoint model - here with explicitly-chosen changepoints for easy inspection

	n = 100 # num obs
	d = 1
	beta_params = (0.05, 0.5)

	changepoints = [0, 50, 70] # 0 must always be in here please

	# how to sample the generating mean
	parentgaussian_mean_mean = 30
	parentgaussian_mean_stdv = 10
	# how to sample the generating stdev
	parentgaussian_stdv_mean = 6
	parentgaussian_stdv_stdv = 3

	x = [] # obsns

	means = []
	stdvs = []

	# generate observations
	for i in range(n):
	if i in changepoints:
	cur_mean = np.random.normal(parentgaussian_mean_mean, parentgaussian_mean_stdv)
	cur_stdv = abs(np.random.normal(parentgaussian_stdv_mean, parentgaussian_stdv_stdv))
	means.append(cur_mean)
	stdvs.append(cur_stdv)
	x.append(np.random.normal([cur_mean], [cur_stdv]))

	flat = list(chain.from_iterable(zip(*x)))

	# write R data
	print "D <- %i" % d
	print "N <- %i" % n
	print "beta_a <- %g" % beta_params[0]
	print "beta_b <- %g" % beta_params[1]
	print "x <- structure(c(" + ", ".join(map(str, flat)) + "), .Dim=c(%i, %i))" % (len(x), d)
	print "# true_changepoints <- %s" % str(changepoints)
	print "# true_means <- %s" % str(means)
	print "# true_stdvs <- %s" % str(stdvs)