Created
February 4, 2019 16:03
-
-
Save alstat/6a79db2d769c1e7f2b3d2ef337ec4be3 to your computer and use it in GitHub Desktop.
Turing.jl Tutorial on Bayesian Linear Regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Turing, Distributions | |
# Import RDatasets. | |
using RDatasets | |
# Import MCMCChain, Plots, and StatPlots for visualizations and diagnostics. | |
using MCMCChain, Plots, StatPlots | |
# MLDataUtils provides a sample splitting tool that's very handy. | |
using MLDataUtils | |
# Set a seed for reproducibility. | |
using Random | |
Random.seed!(0); | |
# Hide the progress prompt while sampling. | |
Turing.turnprogress(false); | |
# Import the "Default" dataset. | |
data = RDatasets.dataset("datasets", "mtcars"); | |
# Show the first six rows of the dataset. | |
head(data, 6) | |
# Split our dataset 70%/30% into training/test sets. | |
train, test = MLDataUtils.splitobs(data, at = 0.7); | |
# Save dataframe versions of our dataset. | |
train_cut = DataFrame(train) | |
test_cut = DataFrame(test) | |
# Create our labels. These are the values we are trying to predict. | |
train_label = train[:, :MPG] | |
test_label = test[:, :MPG] | |
# Get the list of columns to keep. | |
remove_names = filter(x->!in(x, [:MPG, :Model]), names(data)) | |
# Filter the test and train sets. | |
train = Matrix(train[:,remove_names]); | |
test = Matrix(test[:,remove_names]); | |
# A handy helper function to rescale our dataset. | |
function standardize(x) | |
return (x .- mean(x, dims=1)) ./ std(x, dims=1), x | |
end | |
# Another helper function to unstandardize our datasets. | |
function unstandardize(x, orig) | |
return x .* std(orig, dims=1) .+ mean(orig, dims=1) | |
end | |
# Standardize our dataset. | |
(train, train_orig) = standardize(train) | |
(test, test_orig) = standardize(test) | |
(train_label, train_l_orig) = standardize(train_label) | |
(test_label, test_l_orig) = standardize(test_label); | |
# Bayesian linear regression. | |
@model linear_regression(x, y, n_obs, n_vars) = begin | |
# Set variance prior. | |
σ₂ ~ TruncatedNormal(0,100, 0, Inf) | |
# Set intercept prior. | |
intercept ~ Normal(0, 3) | |
# Set the priors on our coefficients. | |
coefficients = Array{Real}(undef, n_vars) | |
coefficients ~ [Normal(0, 10)] | |
# Calculate all the mu terms. | |
mu = intercept .+ x * coefficients | |
for i = 1:n_obs | |
y[i] ~ Normal(mu[i], σ₂) | |
end | |
end; | |
n_obs, n_vars = size(train) | |
model = linear_regression(train, train_label, n_obs, n_vars) | |
chain = sample(model, NUTS(1500, 200, 0.65)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment