mike-lawrence · September 23, 2016 15:22
diff --git a/global_temp_gp_fit.R b/global_temp_gp_fit.R
 #load packages
 library("curl")
 library("ggplot2")
 library("rstan")

 #retrieve the data
 tmpf <- tempfile()
 curl_download("http://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/time_series/HadCRUT.4.5.0.0.annual_ns_avg.txt", tmpf)
 gtemp <- read.table(tmpf, colClasses = rep("numeric", 12))[, 1:2] # only want some of the variables
 names(gtemp) <- c("Year", "Temperature")

 # drops the last row (incomplete 2016)
 gtemp <- head(gtemp, -1)

 #visualize
 p = ggplot() +
 geom_point(
 	data = gtemp
 	, aes(
 		x = Year
 		, y = Temperature
 	)
 )
 print(p)

 #scale for default prior specification
 x = scale(gtemp$Year)[,1]
 y = scale(gtemp$Temperature)[,1]

 #compile the model
 gp_model = stan_model(file="gp_fit.stan")

 #update/sample the model
 post_update <- sampling(
 	gp_model
 	, data=list(
 		x = x
 		, N = length(x)
 		, y = y
 		, N2 = length(x)
 		, x2 = x
 	)
 	, iter = 1e4 #probably overkill
 	, chains = 1
 	, cores = 1
 	, seed = 1
 	, pars = c('rho','eta','sigma','mu2','y2')
 )
 alarm()

 #check for sufficicient ESS & Rhat
 print(post_update) 
 # yup, all >1000

 #get y predictions
 y2 = extract(
 	post_update
 	, pars = 'y2'
 )[[1]]

 #revert to original scale
 y2 = y2*sd(gtemp$Temperature) + mean(gtemp$Temperature)

 #compute credible intervals
 y2_loci = apply(y2,2,quantile,prob=.025)
 y2_hici = apply(y2,2,quantile,prob=.975)

 #add to plot
 p = p + geom_ribbon(
 	data = data.frame(
 		x = gtemp$Year
 		, ymin = y2_loci
 		, ymax = y2_hici
 	)
 	, mapping = aes(
 		x = x
 		, ymin = ymin
 		, ymax = ymax
 	)
 	, alpha = .5
 )
 print(p)


 #get mu2 samples
 mu2 = extract(
 	post_update
 	, pars = 'mu2'
 )[[1]]

 #revert to original scale
 mu2 = mu2*sd(gtemp$Temperature) + mean(gtemp$Temperature)

 #compute credible intervals
 mu2_loci = apply(mu2,2,quantile,prob=.025)
 mu2_hici = apply(mu2,2,quantile,prob=.975)

 #add to plot
 p = p + geom_ribbon(
 	data = data.frame(
 		x = gtemp$Year
 		, ymin = mu2_loci
 		, ymax = mu2_hici
 	)
 	, mapping = aes(
 		x = x
 		, ymin = ymin
 		, ymax = ymax
 	)
 	, alpha = .8
 	, fill = 'green'
 )
 print(p)

 #compute first derivative of y samples
 y2Deriv = matrix(NA,nrow=nrow(y2),ncol=ncol(y2)-1)
 for(i in 1:nrow(y2)){
 	y2Deriv[i,] = diff(y2[i,])
 }

 #compute credible interval for first derivative of y
 y2Deriv_loci = apply(y2Deriv,2,quantile,prob=.025)
 y2Deriv_hici = apply(y2Deriv,2,quantile,prob=.975)

 #plot
 ggplot() + geom_ribbon(
 	data = data.frame(
 		x = gtemp$Year[2:nrow(gtemp)]
 		, ymin = y2Deriv_loci
 		, ymax = y2Deriv_hici
 	)
 	, mapping = aes(
 		x = x
 		, ymin = ymin
 		, ymax = ymax
 	)
 	, alpha = .5
 )+
 geom_hline(
 	yintercept = 0
 	, linetype = 3
 )+
 labs(
 	x = 'Year'
 	, y = 'Year-to-Year\nTemperature change'
 )

 #compute first derivative of mu
 mu2Deriv = matrix(NA,nrow=nrow(y2),ncol=ncol(y2)-1)
 for(i in 1:nrow(y2)){
 	mu2Deriv[i,] = diff(mu2[i,])
 }

 #compute credible interval of first derivative of mu
 mu2Deriv_loci = apply(mu2Deriv,2,quantile,prob=.025)
 mu2Deriv_hici = apply(mu2Deriv,2,quantile,prob=.975)

 #plot
 ggplot() + geom_ribbon(
 	data = data.frame(
 		x = gtemp$Year[2:nrow(gtemp)]
 		, ymin = mu2Deriv_loci
 		, ymax = mu2Deriv_hici
 	)
 	, mapping = aes(
 		x = x
 		, ymin = ymin
 		, ymax = ymax
 	)
 	, alpha = .5
 )+
 geom_hline(
 	yintercept = 0
 	, linetype = 3
 )+
 labs(
 	x = 'Year'
 	, y = 'Year-to-Year\nTemperature change'
 )


 #Try a model with noisy-sampling from a a latent noiseless GP
 gp_model2 = stan_model(file="gp_fit_2.stan")

 #update/sample the model
 post_update2 <- sampling(
 	gp_model2
 	, data=list(
 		x = x
 		, N = length(x)
 		, y = y
 		, fudge = 1e-3
 	)
 	, iter = 2e3 #probably overkill
 	, chains = 4
 	, cores = 4
 	, seed = 1
 )
 alarm()

 print(post_update2) 

 #get posterior samples on latent function f 
 f = extract(
 	post_update2
 	, pars = 'f'
 )[[1]]

 #revert to original scale
 f = f*sd(gtemp$Temperature) + mean(gtemp$Temperature)

 #compute credible intervals
 f_loci = apply(f,2,quantile,prob=.025)
 f_hici = apply(f,2,quantile,prob=.975)

 #add to plot
 p = p + geom_ribbon(
 	data = data.frame(
 		x = gtemp$Year
 		, ymin = f_loci
 		, ymax = f_hici
 	)
 	, mapping = aes(
 		x = x
 		, ymin = ymin
 		, ymax = ymax
 	)
 	, alpha = .5
 )
 print(p)
diff --git a/gp_fit.stan b/gp_fit.stan
 data {
 	int<lower=1> N ; //number of observations
 	vector[N] x ; //x-axis coordinate of observations
 	vector[N] y ; //observations
 	int<lower=1> N2 ; //number of prediction locations
 	vector[N2] x2 ; //prediction locations
 }
 transformed data {
 	vector[N] mu ;
 	for (i in 1:N) {
 		mu[i] <- 0 ;
 	}
 }
 parameters {
 	real<lower=0> eta ; //usually called "eta_sq"
 	real<lower=0> rho ; //usually called "inv_rho_sq"
 	real<lower=0> sigma ; //usually called "sigma_sq"
 }
 transformed parameters{
 	matrix[N,N] L ;
 	{
 		matrix[N,N] covMat ;
 		// off-diagonal elements
 		for (i in 1:(N-1)) {
 			for (j in (i+1):N) {
 				covMat[i,j] <- eta * exp(- pow(x[i] - x[j],2)/rho) ;
 				covMat[j,i] <- covMat[i,j] ;
 			}
 		}
 		// diagonal elements
 		for (k in 1:N){
 			covMat[k,k] <- eta + sigma ;
 		}
 		L <- cholesky_decompose(covMat) ;
 	}
 	
 }
 model {
 	eta ~ weibull(2,1) ;
 	rho ~ student_t(4,0,1) ;
 	sigma ~ weibull(2,1) ;
 	y ~ multi_normal_cholesky(mu,L) ;
 }
 generated quantities {
 	vector[N2] mu2;
 	vector[N2] y2;
 	matrix[N2,N2] L2;
 	{
 		matrix[N,N] Sigma;
 		matrix[N2,N2] Omega;
 		matrix[N,N2] K;
 		matrix[N2,N] K_transpose_div_Sigma;
 		matrix[N2,N2] Tau;
 		
 		// K all elements
 		for (i in 1:N){
 			for (j in 1:N2){
 				K[i,j] <- eta * exp(-pow(x[i] - x2[j], 2)/rho);
 			}
 		}

 		// Sigma off-diagonal elements
 		for (i in 1:(N-1)){
 			for (j in (i+1):N){
 				Sigma[i,j] <- eta * exp(- pow(x[i] - x[j],2)/rho);
 				Sigma[j,i] <- Sigma[i,j];
 			}
 		}
 		
 		//Omega off-diagonal elements
 		for (i in 1:(N2-1)){
 			for (j in (i+1):N2){
 				Omega[i,j] <- eta * exp(- pow(x2[i] - x2[j],2)/rho);
 				Omega[j,i] <- Omega[i,j];
 			}
 		}
 		
 		//Sigma diagonal elements
 		for (k in 1:N){
 			Sigma[k,k] <- eta + sigma; 
 		}

 		//Omega diagonal elements
 		for (k in 1:N2){
 			Omega[k,k] <- eta + sigma;
 		}

 		K_transpose_div_Sigma <- K' / Sigma;
 		mu2 <- K_transpose_div_Sigma * y;
 		Tau <- Omega - K_transpose_div_Sigma * K;

 		for (i in 1:(N2-1)){
 			for (j in (i+1):N2){
 				Tau[i,j] <- Tau[j,i];
 			}
 		}
 		L2 <- cholesky_decompose(Tau);
 	}
 	
 	y2 <- multi_normal_cholesky_rng(mu2, L2);
 }
diff --git a/gp_fit_2.stan b/gp_fit_2.stan
 data {
    // N: num unique x values
    int<lower=1> N ; 
    // x: x values
    vector[N] x ; 
    // y: data
    vector[N] y ;
    // fudge: a fudge factor for computation
    real<lower=0> fudge ;
 }
 transformed data {
    // dx: computing the pairwise distances between x values
    matrix[N,N] dx ;
    for (i in 1:(N-1)) {
        dx[i,i] = 0 ; //Diagonal
        for (j in (i+1):N) { //off-diagonal
            dx[i,j] = -( x[i] - x[j] )^2  ;
            dx[j,i] = dx[i,j] ; // lower mirrors upper
        }
    }
    dx[N,N] = 0 ; //final diagonal
 }
 parameters {
    // noise: sampling noise for replicates
    real<lower=0> noise ;
    // eta: GP parameter, scale of output (y)
    real<lower=0> eta ;
    // rho: GP parameter, x-axis scale of influence between points (wiggliness)
    real<lower=0> rho ;
    //z: dummy variable to speed up computation of GP
    vector[N] z ;
 }
 transformed parameters{
    // f: function values
    vector[N] f ;
    {
        // L: cholesky decomposition of covariance matrix
        matrix[N,N] L ;
        // covMat: covariance matrix
        matrix[N,N] covMat ;
        // covMat off-diagonal
        covMat = exp(dx*rho) * eta ;
        // covMat diagonal: adding jitter to avoid computation issues
        for (k in 1:N){
            covMat[k,k] = eta + fudge ; 
        }  
        L = cholesky_decompose(covMat) ;
        f = L * z ;
    }
 }
 model {
    // priors on GP kernel parameters
    eta ~ weibull(2,1) ;
    rho ~ student_t(4,0,1) ;
    // prior on noise
    noise ~ weibull(2,1) ;
    // assert sampling of z as standard normal
    z ~ normal(0,1) ;
    // assert sampling of each replicate of y as the GP plus noise
    y ~ normal(f,noise) ;
 }
	#load packages
	library("curl")
	library("ggplot2")
	library("rstan")

	#retrieve the data
	tmpf <- tempfile()
	curl_download("http://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/time_series/HadCRUT.4.5.0.0.annual_ns_avg.txt", tmpf)
	gtemp <- read.table(tmpf, colClasses = rep("numeric", 12))[, 1:2] # only want some of the variables
	names(gtemp) <- c("Year", "Temperature")

	# drops the last row (incomplete 2016)
	gtemp <- head(gtemp, -1)

	#visualize
	p = ggplot() +
	geom_point(
	data = gtemp
	, aes(
	x = Year
	, y = Temperature
	)
	)
	print(p)

	#scale for default prior specification
	x = scale(gtemp$Year)[,1]
	y = scale(gtemp$Temperature)[,1]

	#compile the model
	gp_model = stan_model(file="gp_fit.stan")

	#update/sample the model
	post_update <- sampling(
	gp_model
	, data=list(
	x = x
	, N = length(x)
	, y = y
	, N2 = length(x)
	, x2 = x
	)
	, iter = 1e4 #probably overkill
	, chains = 1
	, cores = 1
	, seed = 1
	, pars = c('rho','eta','sigma','mu2','y2')
	)
	alarm()

	#check for sufficicient ESS & Rhat
	print(post_update)
	# yup, all >1000

	#get y predictions
	y2 = extract(
	post_update
	, pars = 'y2'
	)[[1]]

	#revert to original scale
	y2 = y2*sd(gtemp$Temperature) + mean(gtemp$Temperature)

	#compute credible intervals
	y2_loci = apply(y2,2,quantile,prob=.025)
	y2_hici = apply(y2,2,quantile,prob=.975)

	#add to plot
	p = p + geom_ribbon(
	data = data.frame(
	x = gtemp$Year
	, ymin = y2_loci
	, ymax = y2_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	)
	print(p)


	#get mu2 samples
	mu2 = extract(
	post_update
	, pars = 'mu2'
	)[[1]]

	#revert to original scale
	mu2 = mu2*sd(gtemp$Temperature) + mean(gtemp$Temperature)

	#compute credible intervals
	mu2_loci = apply(mu2,2,quantile,prob=.025)
	mu2_hici = apply(mu2,2,quantile,prob=.975)

	#add to plot
	p = p + geom_ribbon(
	data = data.frame(
	x = gtemp$Year
	, ymin = mu2_loci
	, ymax = mu2_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .8
	, fill = 'green'
	)
	print(p)

	#compute first derivative of y samples
	y2Deriv = matrix(NA,nrow=nrow(y2),ncol=ncol(y2)-1)
	for(i in 1:nrow(y2)){
	y2Deriv[i,] = diff(y2[i,])
	}

	#compute credible interval for first derivative of y
	y2Deriv_loci = apply(y2Deriv,2,quantile,prob=.025)
	y2Deriv_hici = apply(y2Deriv,2,quantile,prob=.975)

	#plot
	ggplot() + geom_ribbon(
	data = data.frame(
	x = gtemp$Year[2:nrow(gtemp)]
	, ymin = y2Deriv_loci
	, ymax = y2Deriv_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	)+
	geom_hline(
	yintercept = 0
	, linetype = 3
	)+
	labs(
	x = 'Year'
	, y = 'Year-to-Year\nTemperature change'
	)

	#compute first derivative of mu
	mu2Deriv = matrix(NA,nrow=nrow(y2),ncol=ncol(y2)-1)
	for(i in 1:nrow(y2)){
	mu2Deriv[i,] = diff(mu2[i,])
	}

	#compute credible interval of first derivative of mu
	mu2Deriv_loci = apply(mu2Deriv,2,quantile,prob=.025)
	mu2Deriv_hici = apply(mu2Deriv,2,quantile,prob=.975)

	#plot
	ggplot() + geom_ribbon(
	data = data.frame(
	x = gtemp$Year[2:nrow(gtemp)]
	, ymin = mu2Deriv_loci
	, ymax = mu2Deriv_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	)+
	geom_hline(
	yintercept = 0
	, linetype = 3
	)+
	labs(
	x = 'Year'
	, y = 'Year-to-Year\nTemperature change'
	)


	#Try a model with noisy-sampling from a a latent noiseless GP
	gp_model2 = stan_model(file="gp_fit_2.stan")

	#update/sample the model
	post_update2 <- sampling(
	gp_model2
	, data=list(
	x = x
	, N = length(x)
	, y = y
	, fudge = 1e-3
	)
	, iter = 2e3 #probably overkill
	, chains = 4
	, cores = 4
	, seed = 1
	)
	alarm()

	print(post_update2)

	#get posterior samples on latent function f
	f = extract(
	post_update2
	, pars = 'f'
	)[[1]]

	#revert to original scale
	f = f*sd(gtemp$Temperature) + mean(gtemp$Temperature)

	#compute credible intervals
	f_loci = apply(f,2,quantile,prob=.025)
	f_hici = apply(f,2,quantile,prob=.975)

	#add to plot
	p = p + geom_ribbon(
	data = data.frame(
	x = gtemp$Year
	, ymin = f_loci
	, ymax = f_hici
	)
	, mapping = aes(
	x = x
	, ymin = ymin
	, ymax = ymax
	)
	, alpha = .5
	)
	print(p)
	data {
	int<lower=1> N ; //number of observations
	vector[N] x ; //x-axis coordinate of observations
	vector[N] y ; //observations
	int<lower=1> N2 ; //number of prediction locations
	vector[N2] x2 ; //prediction locations
	}
	transformed data {
	vector[N] mu ;
	for (i in 1:N) {
	mu[i] <- 0 ;
	}
	}
	parameters {
	real<lower=0> eta ; //usually called "eta_sq"
	real<lower=0> rho ; //usually called "inv_rho_sq"
	real<lower=0> sigma ; //usually called "sigma_sq"
	}
	transformed parameters{
	matrix[N,N] L ;
	{
	matrix[N,N] covMat ;
	// off-diagonal elements
	for (i in 1:(N-1)) {
	for (j in (i+1):N) {
	covMat[i,j] <- eta * exp(- pow(x[i] - x[j],2)/rho) ;
	covMat[j,i] <- covMat[i,j] ;
	}
	}
	// diagonal elements
	for (k in 1:N){
	covMat[k,k] <- eta + sigma ;
	}
	L <- cholesky_decompose(covMat) ;
	}

	}
	model {
	eta ~ weibull(2,1) ;
	rho ~ student_t(4,0,1) ;
	sigma ~ weibull(2,1) ;
	y ~ multi_normal_cholesky(mu,L) ;
	}
	generated quantities {
	vector[N2] mu2;
	vector[N2] y2;
	matrix[N2,N2] L2;
	{
	matrix[N,N] Sigma;
	matrix[N2,N2] Omega;
	matrix[N,N2] K;
	matrix[N2,N] K_transpose_div_Sigma;
	matrix[N2,N2] Tau;

	// K all elements
	for (i in 1:N){
	for (j in 1:N2){
	K[i,j] <- eta * exp(-pow(x[i] - x2[j], 2)/rho);
	}
	}

	// Sigma off-diagonal elements
	for (i in 1:(N-1)){
	for (j in (i+1):N){
	Sigma[i,j] <- eta * exp(- pow(x[i] - x[j],2)/rho);
	Sigma[j,i] <- Sigma[i,j];
	}
	}

	//Omega off-diagonal elements
	for (i in 1:(N2-1)){
	for (j in (i+1):N2){
	Omega[i,j] <- eta * exp(- pow(x2[i] - x2[j],2)/rho);
	Omega[j,i] <- Omega[i,j];
	}
	}

	//Sigma diagonal elements
	for (k in 1:N){
	Sigma[k,k] <- eta + sigma;
	}

	//Omega diagonal elements
	for (k in 1:N2){
	Omega[k,k] <- eta + sigma;
	}

	K_transpose_div_Sigma <- K' / Sigma;
	mu2 <- K_transpose_div_Sigma * y;
	Tau <- Omega - K_transpose_div_Sigma * K;

	for (i in 1:(N2-1)){
	for (j in (i+1):N2){
	Tau[i,j] <- Tau[j,i];
	}
	}
	L2 <- cholesky_decompose(Tau);
	}

	y2 <- multi_normal_cholesky_rng(mu2, L2);
	}
	data {
	// N: num unique x values
	int<lower=1> N ;
	// x: x values
	vector[N] x ;
	// y: data
	vector[N] y ;
	// fudge: a fudge factor for computation
	real<lower=0> fudge ;
	}
	transformed data {
	// dx: computing the pairwise distances between x values
	matrix[N,N] dx ;
	for (i in 1:(N-1)) {
	dx[i,i] = 0 ; //Diagonal
	for (j in (i+1):N) { //off-diagonal
	dx[i,j] = -( x[i] - x[j] )^2 ;
	dx[j,i] = dx[i,j] ; // lower mirrors upper
	}
	}
	dx[N,N] = 0 ; //final diagonal
	}
	parameters {
	// noise: sampling noise for replicates
	real<lower=0> noise ;
	// eta: GP parameter, scale of output (y)
	real<lower=0> eta ;
	// rho: GP parameter, x-axis scale of influence between points (wiggliness)
	real<lower=0> rho ;
	//z: dummy variable to speed up computation of GP
	vector[N] z ;
	}
	transformed parameters{
	// f: function values
	vector[N] f ;
	{
	// L: cholesky decomposition of covariance matrix
	matrix[N,N] L ;
	// covMat: covariance matrix
	matrix[N,N] covMat ;
	// covMat off-diagonal
	covMat = exp(dxrho) eta ;
	// covMat diagonal: adding jitter to avoid computation issues
	for (k in 1:N){
	covMat[k,k] = eta + fudge ;
	}
	L = cholesky_decompose(covMat) ;
	f = L * z ;
	}
	}
	model {
	// priors on GP kernel parameters
	eta ~ weibull(2,1) ;
	rho ~ student_t(4,0,1) ;
	// prior on noise
	noise ~ weibull(2,1) ;
	// assert sampling of z as standard normal
	z ~ normal(0,1) ;
	// assert sampling of each replicate of y as the GP plus noise
	y ~ normal(f,noise) ;
	}