Created
October 6, 2020 14:50
-
-
Save torfjelde/943b04dec53e7dba86755e1d9981e226 to your computer and use it in GitHub Desktop.
Simple example of using AdvancedVI.jl + Bijectors.jl to construct a approximate/variational posterior.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> using Bijectors | |
julia> using ForwardDiff | |
julia> using ComponentArrays, UnPack | |
julia> using Bijectors: LeakyReLU, Shift, Scale | |
julia> # Affine transformation is simply a `Scale` composed with `Shift` | |
using Bijectors: Shift, Scale | |
julia> function Affine(W::AbstractMatrix, b::AbstractVector; dim::Val{N} = Val(1)) where {N} | |
return Shift(b; dim = dim) ∘ Scale(W; dim = dim) | |
end | |
Affine (generic function with 1 method) | |
julia> # Example model whose posterior we will approximate | |
using Turing | |
julia> @model function demo(xs) | |
s ~ InverseGamma(2, 3) | |
m ~ Normal(0, √s) | |
for i in eachindex(xs) | |
xs[i] ~ Normal(m, √s) | |
end | |
end; | |
julia> # Generate some data and condition | |
xs = 0.1 .* randn(1000) .+ 1.; | |
julia> m = demo(xs); | |
julia> ######################################################### | |
### Setup for the approximating/variational posterior ### | |
######################################################### | |
d = 2; | |
julia> num_layers = 2; | |
julia> # Structuring | |
proto_arrs = [ | |
ComponentArray(A = randn(d, d), b = randn(d), α = randn()) | |
for l = 1:num_layers | |
]; | |
julia> proto_axes = getaxes.(proto_arrs); | |
julia> num_params = length.(proto_arrs); | |
julia> # Maps an array representing all the parameters to the approximate distribution | |
function getq(θ) | |
idx = 1 | |
layers = map(enumerate(proto_axes)) do (l, prot_axes) | |
axes = proto_axes[l] | |
k = num_params[l] | |
@unpack A, b, α = ComponentArray(θ[idx:idx + k - 1], axes) | |
aff = Affine(A, b) | |
b = LeakyReLU(abs(α) + 0.01; dim = Val(1)) | |
idx += k | |
if l == num_layers | |
layer = aff | |
else | |
layer = b ∘ aff | |
end | |
layer | |
end | |
b = to_constrained ∘ Bijectors.composel(layers...) | |
return transformed(base, b) | |
end | |
getq (generic function with 1 method) | |
julia> # Construct the approximate/variational posterior | |
base = MvNormal(zeros(2), ones(2)); | |
julia> to_unconstrained = bijector(m); | |
julia> to_constrained = inv(to_unconstrained); | |
julia> ###################################################### | |
### Fitting the approximate/variational posterior #### | |
###################################################### | |
# Configuration for the VI algorithm | |
advi = ADVI(10, 1_000) | |
ADVI{AdvancedVI.ForwardDiffAD{40}}(10, 1000) | |
julia> # Initial parameters | |
θ = randn(sum(num_params)); | |
julia> # Turing.jl provides the impl of `vi` for `Model` `m` | |
# q = vi(m, advi, getq, θ ./ sum(num_params)) | |
# Or we provide the map `θ ↦ logπ(data, θ)` directly (needed to compute the ELBO). | |
logπ = Turing.Variational.make_logjoint(m); | |
julia> q_test = getq(θ); | |
julia> logπ(rand(q_test)) # check that it works | |
-865.0655685335663 | |
julia> q = vi(logπ, advi, getq, θ ./ sum(num_params)); | |
┌ Info: [ADVI] Should only be seen once: optimizer created for θ | |
└ objectid(θ) = 0x77b5e7a2a094bb51 | |
[ADVI] Optimizing...100% Time: 0:00:31 | |
julia> # Check the result | |
q_samples = rand(q, 1000); | |
julia> mean(q_samples; dims = 2) | |
2×1 Array{Float64,2}: | |
0.0172600897154137 | |
0.979920660898407 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment