Skip to content

Instantly share code, notes, and snippets.

@torfjelde
Created October 6, 2020 14:50
Show Gist options
  • Save torfjelde/943b04dec53e7dba86755e1d9981e226 to your computer and use it in GitHub Desktop.
Save torfjelde/943b04dec53e7dba86755e1d9981e226 to your computer and use it in GitHub Desktop.
Simple example of using AdvancedVI.jl + Bijectors.jl to construct a approximate/variational posterior.
julia> using Bijectors
julia> using ForwardDiff
julia> using ComponentArrays, UnPack
julia> using Bijectors: LeakyReLU, Shift, Scale
julia> # Affine transformation is simply a `Scale` composed with `Shift`
using Bijectors: Shift, Scale
julia> function Affine(W::AbstractMatrix, b::AbstractVector; dim::Val{N} = Val(1)) where {N}
return Shift(b; dim = dim) ∘ Scale(W; dim = dim)
end
Affine (generic function with 1 method)
julia> # Example model whose posterior we will approximate
using Turing
julia> @model function demo(xs)
s ~ InverseGamma(2, 3)
m ~ Normal(0, √s)
for i in eachindex(xs)
xs[i] ~ Normal(m, √s)
end
end;
julia> # Generate some data and condition
xs = 0.1 .* randn(1000) .+ 1.;
julia> m = demo(xs);
julia> #########################################################
### Setup for the approximating/variational posterior ###
#########################################################
d = 2;
julia> num_layers = 2;
julia> # Structuring
proto_arrs = [
ComponentArray(A = randn(d, d), b = randn(d), α = randn())
for l = 1:num_layers
];
julia> proto_axes = getaxes.(proto_arrs);
julia> num_params = length.(proto_arrs);
julia> # Maps an array representing all the parameters to the approximate distribution
function getq(θ)
idx = 1
layers = map(enumerate(proto_axes)) do (l, prot_axes)
axes = proto_axes[l]
k = num_params[l]
@unpack A, b, α = ComponentArray(θ[idx:idx + k - 1], axes)
aff = Affine(A, b)
b = LeakyReLU(abs(α) + 0.01; dim = Val(1))
idx += k
if l == num_layers
layer = aff
else
layer = b ∘ aff
end
layer
end
b = to_constrained ∘ Bijectors.composel(layers...)
return transformed(base, b)
end
getq (generic function with 1 method)
julia> # Construct the approximate/variational posterior
base = MvNormal(zeros(2), ones(2));
julia> to_unconstrained = bijector(m);
julia> to_constrained = inv(to_unconstrained);
julia> ######################################################
### Fitting the approximate/variational posterior ####
######################################################
# Configuration for the VI algorithm
advi = ADVI(10, 1_000)
ADVI{AdvancedVI.ForwardDiffAD{40}}(10, 1000)
julia> # Initial parameters
θ = randn(sum(num_params));
julia> # Turing.jl provides the impl of `vi` for `Model` `m`
# q = vi(m, advi, getq, θ ./ sum(num_params))
# Or we provide the map `θ ↦ logπ(data, θ)` directly (needed to compute the ELBO).
logπ = Turing.Variational.make_logjoint(m);
julia> q_test = getq(θ);
julia> logπ(rand(q_test)) # check that it works
-865.0655685335663
julia> q = vi(logπ, advi, getq, θ ./ sum(num_params));
┌ Info: [ADVI] Should only be seen once: optimizer created for θ
└ objectid(θ) = 0x77b5e7a2a094bb51
[ADVI] Optimizing...100% Time: 0:00:31
julia> # Check the result
q_samples = rand(q, 1000);
julia> mean(q_samples; dims = 2)
2×1 Array{Float64,2}:
0.0172600897154137
0.979920660898407
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment