Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Created October 11, 2022 17:56
Show Gist options
  • Save vankesteren/c0da7449ea1fc8aab2390cd98bf5f3d5 to your computer and use it in GitHub Desktop.
Save vankesteren/c0da7449ea1fc8aab2390cd98bf5f3d5 to your computer and use it in GitHub Desktop.
Bayesian Bootstrap in Julia
using Downloads
using CSV
using StatsKit
using Gadfly
# Apply macro from StatsBase to create new weights type
BayesianBootstrapWeights = StatsBase.@weights BayesianBoostrapWeights
function StatsBase.varcorrection(w::BayesianBootstrapWeights, corrected::Bool=false)
s = w.sum
if corrected
n = count(!iszero, w)
n / (s * (n - 1))
else
1 / s
end
end
function bbweights(N::Integer)
dist = Dirichlet(ones(N))
return BayesianBootstrapWeights(rand(dist))
end
# use the bayesian bootstrap weights for bootstrapping a regression
# load data
dat_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
dat_path = Downloads.download(dat_url)
dat_file = CSV.File(dat_path; header = 0)
dat_names = [
"alcohol", "malic_acid", "ash", "ash_alkalinity",
"magnesium", "total_phenols", "flavanoids", "nonflavanoid_phenols",
"proanthocyanins", "color_intensity", "hue", "od280_od315", "proline"
]
dat = DataFrame(dat_file)[:,2:end]
DataFrames.rename!(dat, dat_names)
frm = term(:alcohol) ~ sum(term.(dat_names[2:end]))
res = lm(frm, dat)
θ = zeros(13, 10000)
for i in 1:10000
w = bbweights(nrow(dat))
θ[:,i] = coef(lm(frm, dat; wts = w))
end
res_df = DataFrame(
:boot => [std(v) for v in eachrow(θ)],
:lm => coeftable(res).cols[2]
)
# Plot bootstrap distributions
bb_df = DataFrame(θ', vcat("Intercept", dat_names[2:end]))
p = plot(
stack(bb_df[:,2:5]),
xgroup = :variable,
x = :value, color = :variable,
Geom.subplot_grid(Geom.histogram, free_x_axis = true),
Theme(background_color = "white", key_position = :none),
Guide.title("Bayesian Bootstrap posteriors of four parameters.")
)
p |> SVG("boot.svg", 10inch, 5inch)
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment