gngdb · May 18, 2022 05:32
diff --git a/example_usage.py b/example_usage.py
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F

 import numpy as np
 from scipy import optimize

 from obj import PyTorchObjective

 from tqdm import tqdm

 if __name__ == '__main__':
    # whatever this initialises to is our "true" W
    linear = nn.Linear(32,32)
    linear = linear.eval()

    # input X
    N = 10000
    X = torch.Tensor(N,32)
    X.uniform_(0.,1.) # fill with uniform
    eps = torch.Tensor(N,32)
    eps.normal_(0., 1e-4)

    # output Y
    with torch.no_grad():
        Y = linear(X) #+ eps

    # make module executing the experiment
    class Objective(nn.Module):
        def __init__(self):
            super(Objective, self).__init__()
            self.linear = nn.Linear(32,32)
            self.linear = self.linear.train()
            self.X, self.Y = X, Y

        def forward(self):
            output = self.linear(self.X)
            return F.mse_loss(output, self.Y).mean()

    objective = Objective()
    
    maxiter = 100
    with tqdm(total=maxiter) as pbar:
        def verbose(xk):
            pbar.update(1)
        # try to optimize that function with scipy
        obj = PyTorchObjective(objective)
        xL = optimize.minimize(obj.fun, obj.x0, method='BFGS', jac=obj.jac,
                callback=verbose, options={'gtol': 1e-6, 'disp': True,
                    'maxiter':maxiter})
        #xL = optimize.minimize(obj.fun, obj.x0, method='CG', jac=obj.jac)# , options={'gtol': 1e-2})
diff --git a/obj.py b/obj.py
 import torch
 from scipy import optimize
 import torch.nn.functional as F
 import math
 import numpy as np
 from functools import reduce
 from collections import OrderedDict

 class PyTorchObjective(object):
    """PyTorch objective function, wrapped to be called by scipy.optimize."""
    def __init__(self, obj_module):
        self.f = obj_module # some pytorch module, that produces a scalar loss
        # make an x0 from the parameters in this module
        parameters = OrderedDict(obj_module.named_parameters())
        self.param_shapes = {n:parameters[n].size() for n in parameters}
        # ravel and concatenate all parameters to make x0
        self.x0 = np.concatenate([parameters[n].data.numpy().ravel() 
                                   for n in parameters])

    def unpack_parameters(self, x):
        """optimize.minimize will supply 1D array, chop it up for each parameter."""
        i = 0
        named_parameters = OrderedDict()
        for n in self.param_shapes:
            param_len = reduce(lambda x,y: x*y, self.param_shapes[n])
            # slice out a section of this length
            param = x[i:i+param_len]
            # reshape according to this size, and cast to torch
            param = param.reshape(*self.param_shapes[n])
            named_parameters[n] = torch.from_numpy(param)
            # update index
            i += param_len
        return named_parameters

    def pack_grads(self):
        """pack all the gradients from the parameters in the module into a
        numpy array."""
        grads = []
        for p in self.f.parameters():
            grad = p.grad.data.numpy()
            grads.append(grad.ravel())
        return np.concatenate(grads)

    def is_new(self, x):
        # if this is the first thing we've seen
        if not hasattr(self, 'cached_x'):
            return True
        else:
            # compare x to cached_x to determine if we've been given a new input
            x, self.cached_x = np.array(x), np.array(self.cached_x)
            error = np.abs(x - self.cached_x)
            return error.max() > 1e-8

    def cache(self, x):
        # unpack x and load into module 
        state_dict = self.unpack_parameters(x)
        self.f.load_state_dict(state_dict)
        # store the raw array as well
        self.cached_x = x
        # zero the gradient
        self.f.zero_grad()
        # use it to calculate the objective
        obj = self.f()
        # backprop the objective
        obj.backward()
        self.cached_f = obj.item()
        self.cached_jac = self.pack_grads()

    def fun(self, x):
        if self.is_new(x):
            self.cache(x)
        return self.cached_f

    def jac(self, x):
        if self.is_new(x):
            self.cache(x)
        return self.cached_jac
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import torch.nn.functional as F

	import numpy as np
	from scipy import optimize

	from obj import PyTorchObjective

	from tqdm import tqdm

	if __name__ == '__main__':
	# whatever this initialises to is our "true" W
	linear = nn.Linear(32,32)
	linear = linear.eval()

	# input X
	N = 10000
	X = torch.Tensor(N,32)
	X.uniform_(0.,1.) # fill with uniform
	eps = torch.Tensor(N,32)
	eps.normal_(0., 1e-4)

	# output Y
	with torch.no_grad():
	Y = linear(X) #+ eps

	# make module executing the experiment
	class Objective(nn.Module):
	def __init__(self):
	super(Objective, self).__init__()
	self.linear = nn.Linear(32,32)
	self.linear = self.linear.train()
	self.X, self.Y = X, Y

	def forward(self):
	output = self.linear(self.X)
	return F.mse_loss(output, self.Y).mean()

	objective = Objective()

	maxiter = 100
	with tqdm(total=maxiter) as pbar:
	def verbose(xk):
	pbar.update(1)
	# try to optimize that function with scipy
	obj = PyTorchObjective(objective)
	xL = optimize.minimize(obj.fun, obj.x0, method='BFGS', jac=obj.jac,
	callback=verbose, options={'gtol': 1e-6, 'disp': True,
	'maxiter':maxiter})
	#xL = optimize.minimize(obj.fun, obj.x0, method='CG', jac=obj.jac)# , options={'gtol': 1e-2})