thomasaarholt · June 7, 2021 09:10
diff --git a/torch_LBFGS.py b/torch_LBFGS.py
 # Let's minimize the function f(x,y) = (x-50)**2 + (y-100)**2
 # We can tell from looking at the equation that the minimum should be at (50, 100).

 def func(params):
    x, y = params
    return (x-50)**2 + (y-100)**2


 # Optionally, view what it looks like

 # N = 100
 # xi = np.linspace(-N,N, 1000)
 # yi = np.linspace(-N,N, 1000)
 # params = np.meshgrid(xi, yi)
 # fig, ax = plt.subplots()
 # ax.imshow(func(params), extent=(-N, N, N, -N))

 # These are our guesses for x and y. The values in this object will change as the optimizer proceeds.
 # requires_grad ensures that the val object has space for the gradients used in the optimization
 val = torch.tensor((1000., 800.), requires_grad=True)

 # To perform it on the GPU, we would instead use:
 # val = torch.tensor((1000., 800.), requires_grad=True, device='cuda')

 # We will use the L-BFGS optimizer.  
 optimizer = torch.optim.LBFGS([val])

 guesses = [] # to record the values the optimizer think are good
 losses = [] # to monitor the losses - for the scalar loss, this will be `output - target = f(val) - 0 = f(val)`

 # LBGFS takes a function that must return the loss
 def closure():
    optimizer.zero_grad() # Zeroes gradients. This is important.
    output = func(val) # Calculate the function at current guess
    loss = output # unnecessary line, just showing that the output is the loss, since we're finding the minimum where f(x,y) = 0
    loss.backward() # Calculate the gradients
    guesses.append(val.clone()) # val will change, so we must clone it to keep track of its current value
    losses.append(loss.clone()) # same with loss
    return loss

 # "step" is a misnomer here. In contrast to other optimizers (e.g. Adam), the LBFGS does the entire optimization, not just one step.
 # We hence to not need to do "for i in range(1000): optimizer.step()"
 optimizer.step(closure)
 print(f"Minimum: {val[0]}, {val[1]}")
 print(f"Number of steps: {len(guesses)}")
 print(f"Guesses: {guesses}")
 print(f"Losses: {losses}")
	# Let's minimize the function f(x,y) = (x-50)2 + (y-100)2
	# We can tell from looking at the equation that the minimum should be at (50, 100).

	def func(params):
	x, y = params
	return (x-50)2 + (y-100)2


	# Optionally, view what it looks like

	# N = 100
	# xi = np.linspace(-N,N, 1000)
	# yi = np.linspace(-N,N, 1000)
	# params = np.meshgrid(xi, yi)
	# fig, ax = plt.subplots()
	# ax.imshow(func(params), extent=(-N, N, N, -N))

	# These are our guesses for x and y. The values in this object will change as the optimizer proceeds.
	# requires_grad ensures that the val object has space for the gradients used in the optimization
	val = torch.tensor((1000., 800.), requires_grad=True)

	# To perform it on the GPU, we would instead use:
	# val = torch.tensor((1000., 800.), requires_grad=True, device='cuda')

	# We will use the L-BFGS optimizer.
	optimizer = torch.optim.LBFGS([val])

	guesses = [] # to record the values the optimizer think are good
	losses = [] # to monitor the losses - for the scalar loss, this will be `output - target = f(val) - 0 = f(val)`

	# LBGFS takes a function that must return the loss
	def closure():
	optimizer.zero_grad() # Zeroes gradients. This is important.
	output = func(val) # Calculate the function at current guess
	loss = output # unnecessary line, just showing that the output is the loss, since we're finding the minimum where f(x,y) = 0
	loss.backward() # Calculate the gradients
	guesses.append(val.clone()) # val will change, so we must clone it to keep track of its current value
	losses.append(loss.clone()) # same with loss
	return loss

	# "step" is a misnomer here. In contrast to other optimizers (e.g. Adam), the LBFGS does the entire optimization, not just one step.
	# We hence to not need to do "for i in range(1000): optimizer.step()"
	optimizer.step(closure)
	print(f"Minimum: {val[0]}, {val[1]}")
	print(f"Number of steps: {len(guesses)}")
	print(f"Guesses: {guesses}")
	print(f"Losses: {losses}")