Skip to content

Instantly share code, notes, and snippets.

@insujeon
Forked from vvanirudh/bayes_by_backprop.py
Created October 27, 2017 18:28
Show Gist options
  • Save insujeon/317b3f461b0ae735fd32218cfc23282d to your computer and use it in GitHub Desktop.
Save insujeon/317b3f461b0ae735fd32218cfc23282d to your computer and use it in GitHub Desktop.
Bayes by Backprop in PyTorch (introduced in the paper "Weight uncertainty in Neural Networks", Blundell et. al. 2015)
# Drawn from https://gist.github.com/rocknrollnerd/c5af642cf217971d93f499e8f70fcb72 (in Theano)
# This is implemented in PyTorch
# Author : Anirudh Vemula
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
def log_gaussian(x, mu, sigma):
return float(-0.5 * np.log(2 * np.pi) - np.log(np.abs(sigma))) - (x - mu)**2 / (2 * sigma**2)
def log_gaussian_logsigma(x, mu, logsigma):
return float(-0.5 * np.log(2 * np.pi)) - logsigma - (x - mu)**2 / (2 * torch.exp(logsigma)**2)
class MLPLayer(nn.Module):
def __init__(self, n_input, n_output, sigma_prior):
super(MLPLayer, self).__init__()
self.n_input = n_input
self.n_output = n_output
self.sigma_prior = sigma_prior
self.W_mu = nn.Parameter(torch.Tensor(n_input, n_output).normal_(0, 0.01))
self.W_logsigma = nn.Parameter(torch.Tensor(n_input, n_output).normal_(0, 0.01))
self.b_mu = nn.Parameter(torch.Tensor(n_output).uniform_(-0.01, 0.01))
self.b_logsigma = nn.Parameter(torch.Tensor(n_output).uniform_(-0.01, 0.01))
self.lpw = 0
self.lqw = 0
def forward(self, X, infer=False):
if infer:
output = torch.mm(X, self.W_mu) + self.b_mu.expand(X.size()[0], self.n_output)
return output
epsilon_W, epsilon_b = self.get_random()
W = self.W_mu + torch.log(1 + torch.exp(self.W_logsigma)) * epsilon_W
b = self.b_mu + torch.log(1 + torch.exp(self.b_logsigma)) * epsilon_b
output = torch.mm(X, W) + b.expand(X.size()[0], self.n_output)
self.lpw = log_gaussian(W, 0, self.sigma_prior).sum() + log_gaussian(b, 0, self.sigma_prior).sum()
self.lqw = log_gaussian_logsigma(W, self.W_mu, self.W_logsigma).sum() + log_gaussian_logsigma(b, self.b_mu, self.b_logsigma).sum()
return output
def get_random(self):
return Variable(torch.Tensor(self.n_input, self.n_output).normal_(0, self.sigma_prior).cuda()), Variable(torch.Tensor(self.n_output).normal_(0, self.sigma_prior).cuda())
class MLP(nn.Module):
def __init__(self, n_input, sigma_prior):
super(MLP, self).__init__()
self.l1 = MLPLayer(n_input, 200, sigma_prior)
self.l1_relu = nn.ReLU()
self.l2 = MLPLayer(200, 200, sigma_prior)
self.l2_relu = nn.ReLU()
self.l3 = MLPLayer(200, 10, sigma_prior)
self.l3_softmax = nn.Softmax()
def forward(self, X, infer=False):
output = self.l1_relu(self.l1(X, infer))
output = self.l2_relu(self.l2(output, infer))
output = self.l3_softmax(self.l3(output, infer))
return output
def get_lpw_lqw(self):
lpw = self.l1.lpw + self.l2.lpw + self.l3.lpw
lqw = self.l1.lqw + self.l2.lqw + self.l3.lqw
return lpw, lqw
def forward_pass_samples(X, y):
s_log_pw, s_log_qw, s_log_likelihood = 0., 0., 0.
for _ in xrange(n_samples):
output = net(X)
sample_log_pw, sample_log_qw = net.get_lpw_lqw()
sample_log_likelihood = log_gaussian(y, output, sigma_prior).sum()
s_log_pw += sample_log_pw
s_log_qw += sample_log_qw
s_log_likelihood += sample_log_likelihood
return s_log_pw/n_samples, s_log_qw/n_samples, s_log_likelihood/n_samples
def criterion(l_pw, l_qw, l_likelihood):
return ((1./n_batches) * (l_qw - l_pw) - l_likelihood).sum() / float(batch_size)
mnist = fetch_mldata('MNIST original')
N = 5000
data = np.float32(mnist.data[:]) / 255.
idx = np.random.choice(data.shape[0], N)
data = data[idx]
target = np.int32(mnist.target[idx]).reshape(N, 1)
train_idx, test_idx = train_test_split(np.array(range(N)), test_size=0.05)
train_data, test_data = data[train_idx], data[test_idx]
train_target, test_target = target[train_idx], target[test_idx]
train_target = np.float32(preprocessing.OneHotEncoder(sparse=False).fit_transform(train_target))
n_input = train_data.shape[1]
M = train_data.shape[0]
sigma_prior = float(np.exp(-3))
n_samples = 3
learning_rate = 0.001
n_epochs = 100
# Initialize network
net = MLP(n_input, sigma_prior)
net = net.cuda()
# building the objective
# remember, we're evaluating by samples
log_pw, log_qw, log_likelihood = 0., 0., 0.
batch_size = 100
n_batches = M / float(batch_size)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
n_train_batches = int(train_data.shape[0] / float(batch_size))
for e in xrange(n_epochs):
errs = []
for b in range(n_train_batches):
net.zero_grad()
X = Variable(torch.Tensor(train_data[b * batch_size: (b+1) * batch_size]).cuda())
y = Variable(torch.Tensor(train_target[b * batch_size: (b+1) * batch_size]).cuda())
log_pw, log_qw, log_likelihood = forward_pass_samples(X, y)
loss = criterion(log_pw, log_qw, log_likelihood)
errs.append(loss.data.cpu().numpy())
loss.backward()
optimizer.step()
X = Variable(torch.Tensor(test_data).cuda(), volatile=True)
pred = net(X, infer=True)
_, out = torch.max(pred, 1)
acc = np.count_nonzero(np.squeeze(out.data.cpu().numpy()) == np.int32(test_target.ravel())) / float(test_data.shape[0])
print 'epoch', e, 'loss', np.mean(errs), 'acc', acc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment