Created
October 13, 2023 09:38
-
-
Save ojwoodford/4d14f1bb79d551909c332f0b93ed1308 to your computer and use it in GitHub Desktop.
Experiment demonstrating the effect of a non-zero optimal residual on Levenberg-Marquardt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using StaticArrays, GLMakie, LinearAlgebra, Static | |
import NLLSsolver | |
const λ = 10.0 | |
# Define the Rosenbrock cost function | |
struct RosenbrockA <: NLLSsolver.AbstractResidual | |
end | |
Base.eltype(::RosenbrockA) = Float64 | |
NLLSsolver.ndeps(::RosenbrockA) = static(1) # Residual depends on 1 variable | |
NLLSsolver.nres(::RosenbrockA) = static(2) # Residual has length 2 | |
NLLSsolver.varindices(::RosenbrockA) = SVector(1) # There's only one variable | |
NLLSsolver.getvars(::RosenbrockA, vars::Vector) = (vars[1]::NLLSsolver.EuclideanVector{2, Float64},) | |
NLLSsolver.computeresidual(::RosenbrockA, x) = SVector(x[1] - 1, 10 * (x[1] ^ 2 - x[2])) | |
struct RosenbrockB <: NLLSsolver.AbstractResidual | |
end | |
Base.eltype(::RosenbrockB) = Float64 | |
NLLSsolver.ndeps(::RosenbrockB) = static(1) # Residual depends on 1 variable | |
NLLSsolver.nres(::RosenbrockB) = static(3) # Residual has length 3 | |
NLLSsolver.varindices(::RosenbrockB) = SVector(1) # There's only one variable | |
NLLSsolver.getvars(::RosenbrockB, vars::Vector) = (vars[1]::NLLSsolver.EuclideanVector{2, Float64},) | |
NLLSsolver.computeresidual(::RosenbrockB, x) = SVector(x[1] - 1, 10 * (x[1] ^ 2 - x[2]), convert(eltype(x), λ)) | |
function constructrosenbrockprobs() | |
# Create the problems | |
problemA = NLLSsolver.NLLSProblem{NLLSsolver.EuclideanVector{2, Float64}, RosenbrockA}([NLLSsolver.EuclideanVector(0., 0.)], NLLSsolver.VectorRepo{RosenbrockA}(RosenbrockA => [RosenbrockA()])) | |
problemB = NLLSsolver.NLLSProblem{NLLSsolver.EuclideanVector{2, Float64}, RosenbrockB}([NLLSsolver.EuclideanVector(0., 0.)], NLLSsolver.VectorRepo{RosenbrockB}(RosenbrockB => [RosenbrockB()])) | |
return problemA, problemB | |
end | |
function computeCostGrid(costs, X, Y) | |
grid = Matrix{Float64}(undef, length(Y), length(X)) | |
vars = [SVector(0.0, 0.0)] | |
for (b, y) in enumerate(Y) | |
for (a, x) in enumerate(X) | |
vars[1] = SVector(x, y) | |
grid[a,b] = NLLSsolver.cost(vars, costs) | |
end | |
end | |
return grid | |
end | |
struct OptimResult | |
costs::Observable{Vector{Float64}} | |
trajectory::Observable{Vector{Point2f}} | |
end | |
OptimResult() = OptimResult(Observable(Vector{Float64}()), Observable(Vector{Point2f}())) | |
function runoptimizers!(results, problems, start) | |
options = NLLSsolver.NLLSOptions() | |
costtrajectory = NLLSsolver.CostTrajectory() | |
for (ind, problem) in enumerate(problems) | |
# Set the start | |
problem.variables[1] = NLLSsolver.EuclideanVector(start[1], start[2]) | |
# Optimize the cost | |
result = NLLSsolver.optimize!(problem, options, nothing, NLLSsolver.storecostscallback(costtrajectory)) | |
# Construct the trajectory | |
resize!(results[ind].trajectory.val, length(costtrajectory.trajectory)+1) | |
@inbounds results[ind].trajectory.val[1] = Point2f(start[1], start[2]) | |
for (i, step) in enumerate(costtrajectory.trajectory) | |
@inbounds results[ind].trajectory.val[i+1] = results[ind].trajectory.val[i] + Point2f(step[1], step[2]) | |
end | |
# Set the costs | |
resize!(results[ind].costs.val, length(costtrajectory.costs)+1) | |
@inbounds results[ind].costs.val[1] = result.startcost | |
@inbounds results[ind].costs.val[2:end] .= max.(costtrajectory.costs, 1.e-38) | |
empty!(costtrajectory) | |
end | |
results[2].costs.val .= max.(results[2].costs.val .- (λ ^ 2 / 2), 1.e-38) | |
for res in results | |
notify(res.costs) | |
notify(res.trajectory) | |
end | |
end | |
function optimize2DProblem(problems, start, xrange, yrange) | |
# Compute the results | |
results = [OptimResult() for i in range(1, length(problems))] | |
runoptimizers!(results, problems, start) | |
# Compute costs over a grid | |
grid = computeCostGrid(problems[1].costs, xrange, yrange) | |
minval = minimum(grid) | |
grid = map(x -> log1p(x - minval), grid) | |
# Create the plot | |
GLMakie.activate!(inline=false) | |
fig = Figure() | |
ax1 = Axis(fig[1, 1]; limits=(xrange[1], xrange[end], yrange[1], yrange[end]), title="Trajectories") | |
ax2 = Axis(fig[1, 2]; title="Costs", xlabel="Iteration", yscale=log10) | |
heatmap!(ax1, xrange, yrange, grid) | |
contour!(ax1, xrange, yrange, grid, linewidth=2, color=:white, levels=10) | |
# Plot the trajectory and costs | |
colors = [:navy, :red] | |
labels = ["Standard", "Augmented"] | |
for ind = 1:length(problems) | |
color = colors[mod(ind-1, length(colors)) + 1] | |
scatterlines!(ax1, results[ind].trajectory, color=color, linewidth=2.5) | |
scatterlines!(ax2, results[ind].costs, color=color, linewidth=1.5, label=labels[ind]) | |
end | |
axislegend(ax2) | |
# Allow start points to be selected | |
on(events(ax1).mousebutton) do event | |
if event.button == Mouse.left && event.action == Mouse.press | |
runoptimizers!(results, problems, mouseposition(ax1.scene)) | |
end | |
end | |
return fig | |
end | |
optimize2DProblem(constructrosenbrockprobs(), [-0.5, 2.5], range(-1.5, 3.0, 1000), range(-1.5, 3.0, 1000)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment