WardBrian · March 18, 2025 18:43
diff --git a/analysis.py b/analysis.py
 import matplotlib.pyplot as plt
 import numpy as np
 import json

 # load data from data.json
 with open("data.json") as f:
    data = json.load(f)
 x = np.array(data['x'])
 y = np.array(data['y'])

 # posterior predictive check -- manual way
 alpha = draws.get('alpha').flatten()
 beta = draws.get('beta').flatten()

 # plot 250 random samples to get an idea of envelope
 N = 250
 choices = np.random.choice(len(alpha), N)
 for i in choices:
  plt.plot(x, alpha[i] + beta[i] * x, color='gray', alpha=0.05)

 # plot mean line
 plt.plot(x, alpha.mean() + beta.mean() * x, label='Mean estimate', color='black')

 # plot data
 plt.scatter(x, y, label='data')

 plt.legend()
 plt.show()

 # less manual way
 import arviz as az
 import xarray as xr
 idata = draws.to_arviz()

 idata.add_groups(observed_data=xr.DataArray(name="y", data=y, coords={"x":x}))

 idata.posterior['y_model'] = idata.posterior['alpha'] + idata.posterior['beta'] * idata.observed_data.x

 az.plot_lm(idata=idata, y='y', y_model="y_model",
  # also try kind_model = 'hdi'
  kind_model="lines", 
  y_model_mean_kwargs={"color":"black"}, y_model_plot_kwargs={"color": "gray"})
diff --git a/analysis.R b/analysis.R
 install.packages("ggplot2")
 library(posterior)
 library(ggplot2)

 # load data from data.json
 data <- jsonlite::read_json("./data.json")
 x <- unlist(data$x)
 y <- unlist(data$y)

 # posterior predictive check 
 alpha <- extract_variable(draws, "alpha")
 beta <- extract_variable(draws, "beta")

 # 250 random draws
 N <- 250
 idxs <- sample(length(alpha), 250)

 ggplot(data.frame(x = x, y = y), aes(x = x, y = y)) +
  geom_abline(data = data.frame(alpha=alpha[idxs], beta=beta[idxs]), 
              aes(intercept = alpha, slope = beta),
              color = "gray", alpha = 0.1) +
  # posterior mean
  geom_abline(intercept = mean(alpha), 
              slope = mean(beta),
              color = "black", size = 1) +
  # plot data
  geom_point(color = "steelblue", size = 2) +
  theme_bw()
diff --git a/data.json b/data.json
 {
  "N": 10,
  "x": [
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10
  ],
  "y": [
    3.5164361480858806,
    1.9422090649104387,
    3.856878064212197,
    5.8081366125433025,
    7.727339580090061,
    7.974795841003987,
    9.775364868869321,
    10.737390454859584,
    12.557258326283772,
    15.382774755732974
  ]
 }
diff --git a/data.py b/data.py
 # simulate regression data from known parameters
 import numpy as np

 # "true" values of the parameters
 beta = 1.25
 alpha = -0.1
 sigma = 1.6

 N = 10

 x = np.arange(1, N+1)

 y = np.random.normal(alpha + beta * x, sigma)

 data = {"N":N, "x":x, "y":y}
diff --git a/data.R b/data.R
 # simulate regression data from known parameters

 # "true" values of the parameters
 beta <- 1.25
 alpha <- -0.1
 sigma <- 1.6

 N <- 10

 x <- 1:N

 y <- rnorm(N, alpha + beta * x, sigma)

 data <- list(N=N, x=x, y=y)
diff --git a/main.stan b/main.stan
 data {
  int<lower=0> N;
  vector[N] x;
  vector[N] y;
 }
 parameters {
  real alpha;
  real beta;
  real<lower=0> sigma;
 }
 model {
  y ~ normal(alpha + beta * x, sigma);
 }
	import matplotlib.pyplot as plt
	import numpy as np
	import json

	# load data from data.json
	with open("data.json") as f:
	data = json.load(f)
	x = np.array(data['x'])
	y = np.array(data['y'])

	# posterior predictive check -- manual way
	alpha = draws.get('alpha').flatten()
	beta = draws.get('beta').flatten()

	# plot 250 random samples to get an idea of envelope
	N = 250
	choices = np.random.choice(len(alpha), N)
	for i in choices:
	plt.plot(x, alpha[i] + beta[i] * x, color='gray', alpha=0.05)

	# plot mean line
	plt.plot(x, alpha.mean() + beta.mean() * x, label='Mean estimate', color='black')

	# plot data
	plt.scatter(x, y, label='data')

	plt.legend()
	plt.show()

	# less manual way
	import arviz as az
	import xarray as xr
	idata = draws.to_arviz()

	idata.add_groups(observed_data=xr.DataArray(name="y", data=y, coords={"x":x}))

	idata.posterior['y_model'] = idata.posterior['alpha'] + idata.posterior['beta'] * idata.observed_data.x

	az.plot_lm(idata=idata, y='y', y_model="y_model",
	# also try kind_model = 'hdi'
	kind_model="lines",
	y_model_mean_kwargs={"color":"black"}, y_model_plot_kwargs={"color": "gray"})
	install.packages("ggplot2")
	library(posterior)
	library(ggplot2)

	# load data from data.json
	data <- jsonlite::read_json("./data.json")
	x <- unlist(data$x)
	y <- unlist(data$y)

	# posterior predictive check
	alpha <- extract_variable(draws, "alpha")
	beta <- extract_variable(draws, "beta")

	# 250 random draws
	N <- 250
	idxs <- sample(length(alpha), 250)

	ggplot(data.frame(x = x, y = y), aes(x = x, y = y)) +
	geom_abline(data = data.frame(alpha=alpha[idxs], beta=beta[idxs]),
	aes(intercept = alpha, slope = beta),
	color = "gray", alpha = 0.1) +
	# posterior mean
	geom_abline(intercept = mean(alpha),
	slope = mean(beta),
	color = "black", size = 1) +
	# plot data
	geom_point(color = "steelblue", size = 2) +
	theme_bw()
	{
	"N": 10,
	"x": [
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10
	],
	"y": [
	3.5164361480858806,
	1.9422090649104387,
	3.856878064212197,
	5.8081366125433025,
	7.727339580090061,
	7.974795841003987,
	9.775364868869321,
	10.737390454859584,
	12.557258326283772,
	15.382774755732974
	]
	}
	# simulate regression data from known parameters
	import numpy as np

	# "true" values of the parameters
	beta = 1.25
	alpha = -0.1
	sigma = 1.6

	N = 10

	x = np.arange(1, N+1)

	y = np.random.normal(alpha + beta * x, sigma)

	data = {"N":N, "x":x, "y":y}
	# simulate regression data from known parameters

	# "true" values of the parameters
	beta <- 1.25
	alpha <- -0.1
	sigma <- 1.6

	N <- 10

	x <- 1:N

	y <- rnorm(N, alpha + beta * x, sigma)

	data <- list(N=N, x=x, y=y)
	data {
	int<lower=0> N;
	vector[N] x;
	vector[N] y;
	}
	parameters {
	real alpha;
	real beta;
	real<lower=0> sigma;
	}
	model {
	y ~ normal(alpha + beta * x, sigma);
	}