alstat · February 4, 2019 16:03
diff --git a/code-turing.jl b/code-turing.jl
 using Turing, Distributions

 # Import RDatasets.
 using RDatasets

 # Import MCMCChain, Plots, and StatPlots for visualizations and diagnostics.
 using MCMCChain, Plots, StatPlots

 # MLDataUtils provides a sample splitting tool that's very handy.
 using MLDataUtils

 # Set a seed for reproducibility.
 using Random
 Random.seed!(0);

 # Hide the progress prompt while sampling.
 Turing.turnprogress(false);

 # Import the "Default" dataset.
 data = RDatasets.dataset("datasets", "mtcars");

 # Show the first six rows of the dataset.
 head(data, 6)

 # Split our dataset 70%/30% into training/test sets.
 train, test = MLDataUtils.splitobs(data, at = 0.7);

 # Save dataframe versions of our dataset.
 train_cut = DataFrame(train)
 test_cut = DataFrame(test)

 # Create our labels. These are the values we are trying to predict.
 train_label = train[:, :MPG]
 test_label = test[:, :MPG]

 # Get the list of columns to keep.
 remove_names = filter(x->!in(x, [:MPG, :Model]), names(data))

 # Filter the test and train sets.
 train = Matrix(train[:,remove_names]);
 test = Matrix(test[:,remove_names]);

 # A handy helper function to rescale our dataset.
 function standardize(x)
    return (x .- mean(x, dims=1)) ./ std(x, dims=1), x
 end

 # Another helper function to unstandardize our datasets.
 function unstandardize(x, orig)
    return x .* std(orig, dims=1) .+ mean(orig, dims=1)
 end

 # Standardize our dataset.
 (train, train_orig) = standardize(train)
 (test, test_orig) = standardize(test)
 (train_label, train_l_orig) = standardize(train_label)
 (test_label, test_l_orig) = standardize(test_label);

 # Bayesian linear regression.
 @model linear_regression(x, y, n_obs, n_vars) = begin
    # Set variance prior.
    σ₂ ~ TruncatedNormal(0,100, 0, Inf)
    
    # Set intercept prior.
    intercept ~ Normal(0, 3)
    
    # Set the priors on our coefficients.
    coefficients = Array{Real}(undef, n_vars)
    coefficients ~ [Normal(0, 10)]
    
    # Calculate all the mu terms.
    mu = intercept .+ x * coefficients
    for i = 1:n_obs
        y[i] ~ Normal(mu[i], σ₂)
    end
 end;

 n_obs, n_vars = size(train)
 model = linear_regression(train, train_label, n_obs, n_vars)
 chain = sample(model, NUTS(1500, 200, 0.65));
	using Turing, Distributions

	# Import RDatasets.
	using RDatasets

	# Import MCMCChain, Plots, and StatPlots for visualizations and diagnostics.
	using MCMCChain, Plots, StatPlots

	# MLDataUtils provides a sample splitting tool that's very handy.
	using MLDataUtils

	# Set a seed for reproducibility.
	using Random
	Random.seed!(0);

	# Hide the progress prompt while sampling.
	Turing.turnprogress(false);

	# Import the "Default" dataset.
	data = RDatasets.dataset("datasets", "mtcars");

	# Show the first six rows of the dataset.
	head(data, 6)

	# Split our dataset 70%/30% into training/test sets.
	train, test = MLDataUtils.splitobs(data, at = 0.7);

	# Save dataframe versions of our dataset.
	train_cut = DataFrame(train)
	test_cut = DataFrame(test)

	# Create our labels. These are the values we are trying to predict.
	train_label = train[:, :MPG]
	test_label = test[:, :MPG]

	# Get the list of columns to keep.
	remove_names = filter(x->!in(x, [:MPG, :Model]), names(data))

	# Filter the test and train sets.
	train = Matrix(train[:,remove_names]);
	test = Matrix(test[:,remove_names]);

	# A handy helper function to rescale our dataset.
	function standardize(x)
	return (x .- mean(x, dims=1)) ./ std(x, dims=1), x
	end

	# Another helper function to unstandardize our datasets.
	function unstandardize(x, orig)
	return x .* std(orig, dims=1) .+ mean(orig, dims=1)
	end

	# Standardize our dataset.
	(train, train_orig) = standardize(train)
	(test, test_orig) = standardize(test)
	(train_label, train_l_orig) = standardize(train_label)
	(test_label, test_l_orig) = standardize(test_label);

	# Bayesian linear regression.
	@model linear_regression(x, y, n_obs, n_vars) = begin
	# Set variance prior.
	σ₂ ~ TruncatedNormal(0,100, 0, Inf)

	# Set intercept prior.
	intercept ~ Normal(0, 3)

	# Set the priors on our coefficients.
	coefficients = Array{Real}(undef, n_vars)
	coefficients ~ [Normal(0, 10)]

	# Calculate all the mu terms.
	mu = intercept .+ x * coefficients
	for i = 1:n_obs
	y[i] ~ Normal(mu[i], σ₂)
	end
	end;

	n_obs, n_vars = size(train)
	model = linear_regression(train, train_label, n_obs, n_vars)
	chain = sample(model, NUTS(1500, 200, 0.65));