Created
January 5, 2021 05:13
-
-
Save sharanry/6026786104067508ad8b91e2d793322c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# using DrWatson | |
# @quickactivate "ParamRegNN" | |
@time using Revise | |
@time using CUDAdrv; @show CUDAdrv.name(CuDevice(0)) | |
@time using CUDA | |
@time using AdvancedHMC #master | |
@time using Zygote | |
@time using Distributions | |
@time using Functors | |
@time using Flux | |
import Random | |
Random.seed!(123); | |
CUDA.seed!(123); | |
if has_cuda() # Check if CUDA is available | |
@info "CUDA is on" | |
end | |
# toy data generating function f: R^3 -> R^5 | |
function f(x) | |
@assert length(x) == 3 | |
y = softmax([sin(x[1] +x[3]), cos(x[2] + x[1]), sin(x[1] + x[2]), cos(x[2] + x[3]), cos(x[3] + x[1])]) | |
argmax(y) | |
end | |
# toy data generation | |
N = 1000 | |
x = [rand(Normal(0, 4), 3) for i in 1:N] | |
y = f.(x); | |
x = gpu(hcat(x...)); | |
y = map(x -> Flux.onehot(x, 1:5), y); | |
y = gpu(Float32.(hcat(y...))); | |
abstract type ProbablisticLayer end | |
struct DenseProbDropout{F,S,T,P} <: ProbablisticLayer | |
σ::F | |
W::S | |
b::T | |
p::P # inferred using MCMC(posterior) / SGD(MAP est) | |
end | |
function DenseProbDropout(in::Integer, out::Integer, σ = identity; | |
initW = Flux.glorot_uniform, initb = zeros) | |
return DenseProbDropout(σ, CUDA.cu(initW(out, in)), CUDA.cu(initb(out)), CUDA.randn(out)) | |
end | |
Functors.functor(a::DenseProbDropout) = ((σ=a.σ, W=a.W, b=a.b), x -> DenseProbDropout(x.σ, x.W, x.b, a.p)) | |
function replace_probs(a::DenseProbDropout, p) | |
@assert length(p) == length(a.p) | |
return DenseProbDropout(a.σ, a.W, a.b, p) | |
end | |
function replace_probs(c::Chain, probs) | |
i = 0 | |
layers = [ | |
(layer isa DenseProbDropout) ? | |
begin | |
i += 1 | |
replace_probs(layer, probs[i]) | |
end : layer | |
for layer in c.layers | |
] | |
return Chain(layers...) | |
end | |
function (a::DenseProbDropout)(x) | |
W, b, σ, p = a.W, a.b, a.σ, a.p | |
return σ.((W*x .+ b) .* (p .+ 1)) | |
end | |
function dropout_params(a::DenseProbDropout) | |
return [a.p] | |
end | |
function dropout_params(model::Chain) | |
inf_params = Any[] | |
for layer in model | |
if layer isa ProbablisticLayer | |
append!(inf_params, dropout_params(layer)) | |
end | |
end | |
return inf_params | |
end | |
function toy_model(;inp=3, out=5) | |
return Chain( | |
DenseProbDropout(inp, 10, relu), | |
Dense(10, out) | |
) | |
end | |
m = toy_model() | |
m = gpu(m) | |
m(CUDA.rand(3, 10)) | |
@time grad = gradient(() -> Flux.logitcrossentropy(m(x), y), Flux.Params(dropout_params(m))) | |
grad.grads | |
n_inf_params = sum(length.(dropout_params(m))) | |
prior = MvNormal(n_inf_params, 1) | |
# let us consider a single minibatch of train_data | |
d = (x, y) | |
lengths = length.(dropout_params(m)) | |
cumsum_lengths = cumsum(lengths) | |
log_pdf(params) = begin | |
-Flux.logitcrossentropy( | |
replace_probs( | |
m, | |
[params[s:e] for (s, e) in zip(cumsum_lengths .- lengths .+ 1, cumsum_lengths)] | |
)(first(d)), | |
last(d) | |
) + logpdf(prior, params) | |
end | |
log_pdf(gpu(randn(10))) | |
Zygote.@adjoint function Iterators.Zip(xs) | |
back(dy::NamedTuple{(:is,)}) = tuple(dy.is) | |
back(dy::AbstractArray) = ntuple(length(xs)) do d | |
dx = map(y->y[d], dy) | |
length(dx) == length(xs[d]) ? dx : vcat(dx, falses(length(xs[d])-length(dx))) | |
end |> tuple | |
back(::AbstractArray{Nothing}) = nothing | |
Iterators.Zip(xs), back | |
end | |
initial_θ = vcat(dropout_params(m)...) | |
metric = UnitEuclideanMetric(Float32, n_inf_params) | |
hamiltonian = Hamiltonian(metric, log_pdf, Zygote) | |
initial_ϵ = Float32(0.1) #find_good_stepsize(hamiltonian, initial_θ) | |
integrator = Leapfrog(initial_ϵ) | |
proposal = StaticTrajectory(integrator, 1) | |
include("ahmc_gpu.jl") | |
CUDA.allowscalar(false) | |
samples = sample(hamiltonian, proposal, initial_θ, 100; progress=true) | |
# CUDA.allowscalar(true) | |
# samples = sample(hamiltonian, proposal, initial_θ, 100; progress=true) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment