Last active
January 17, 2019 20:16
-
-
Save odow/58a3469c9119b58069ab708431426905 to your computer and use it in GitHub Desktop.
Create a NLPEvaluator from expressions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Calculus, MathOptInterface | |
const MOI = MathOptInterface | |
# ============================================================================== | |
# | |
# First we construct a simple subtype of MOI.AbstractNLPEvaluator. We use it | |
# to provide thin overloads for all the MOI NLP functions. | |
# | |
# ============================================================================== | |
struct NLPEvaluator{F, GradF, G, JacG, HessL} <: MOI.AbstractNLPEvaluator | |
objective_expr::Expr | |
objective::F | |
objective_gradient::GradF | |
constraint_expr::Vector{Expr} | |
constraint::G | |
jacobian_structure::Vector{Tuple{Int64, Int64}} | |
constraint_jacobian::JacG | |
hessian_lagrangian_structure::Vector{Tuple{Int64, Int64}} | |
hessian_lagrangian::HessL | |
end | |
MOI.eval_objective(n::NLPEvaluator, x) = n.objective(x) | |
function MOI.eval_constraint(n::NLPEvaluator, g, x) | |
n.constraint(g, x) | |
return | |
end | |
function MOI.eval_objective_gradient(n::NLPEvaluator, g, x) | |
n.objective_gradient(g, x) | |
return | |
end | |
MOI.jacobian_structure(n::NLPEvaluator) = n.jacobian_structure | |
function MOI.hessian_lagrangian_structure(n::NLPEvaluator) | |
return n.hessian_lagrangian_structure | |
end | |
function MOI.eval_constraint_jacobian(n::NLPEvaluator, J, x) | |
n.constraint_jacobian(J, x) | |
return | |
end | |
function MOI.eval_constraint_jacobian_product(n::NLPEvaluator, y, x, w) | |
J = zeros(length(MOI.jacobian_structure(n))) | |
MOI.eval_constraint_jacobian(n, J, x) | |
y .= J * w | |
return | |
end | |
function MOI.eval_constraint_jacobian_transpose_product(n::NLPEvaluator, y, x, w) | |
J = zeros(length(MOI.jacobian_structure(n))) | |
MOI.eval_constraint_jacobian(n, J, x) | |
y .= J' * w | |
return | |
end | |
function MOI.eval_hessian_lagrangian(n::NLPEvaluator, H, x, σ, μ) | |
return n.hessian_lagrangian(H, x, σ, μ) | |
end | |
function MOI.eval_hessian_lagrangian_product(n::NLPEvaluator, H, x, v, σ, μ) | |
H = zeros(length(MOI.hessian_structure(n))) | |
MOI.eval_hessian_lagrangian(n, H, x, σ, μ) | |
h .= H * v | |
return | |
end | |
MOI.objective_expr(n::NLPEvaluator) = n.objective_expr | |
MOI.constraint_expr(n::NLPEvaluator, i) = n.constraint_expr[i] | |
# ============================================================================== | |
# | |
# Now comes the tricky bit. We use Calculus.jl to symbolically differentiate | |
# expressions. We need to subsitute references into the array `x` with a | |
# unique symbol name, so expressions like `:(2x[1] + x[2])` become `:(2 * x_1 | |
# + x_2)`. We undo this at the end. | |
# | |
# ============================================================================== | |
""" | |
walk_replace_expression(replace_function, expr, args...) | |
Walk the arguments of `expr`, replacing them with | |
`replace_function(ex, args...)`. | |
""" | |
function walk_replace_expression(replace_function, expr, args...) | |
for (i, ex) in enumerate(expr.args) | |
expr.args[i] = replace_function(ex, args...) | |
end | |
return expr | |
end | |
""" | |
replace_ref(expr, names::Dict{Symbol, Int}) | |
Walk an expression graph replacing instances of :(x[i]) with a unique symbol. | |
Store a mapping of the symbol to the index `i` in `names`. | |
""" | |
function replace_ref(expr::Expr, names::Dict{Symbol, Int}) | |
if expr.head == :ref | |
# A reference we want to replace. Not that only `:(x[i])` can exist by | |
# as inputs to this function, i.e, not `:(y[i])`. | |
@assert expr.args[1] == :x | |
name = Symbol("MathOptFormat" * join(expr.args, "_")) | |
if !haskey(names, name) | |
names[name] = expr.args[2] | |
end | |
return name | |
else | |
return walk_replace_expression(replace_ref, expr, names) | |
end | |
end | |
# Fallback for anything else. | |
replace_ref(expr::Any, names::Dict{Symbol, Int}) = expr | |
""" | |
replace_ref(expr, names::Dict{Symbol, Int}) | |
Walk an expression graph replacing instances of symbols in `names` with the | |
corresponding `:(x[i])` where i is the value of the mapping in `names`. | |
""" | |
function reverse_replace_ref(expr::Symbol, names::Dict{Symbol, Int}) | |
return haskey(names, expr) ? Expr(:ref, :x, names[expr]) : expr | |
end | |
# Walk the arguments of an expression. | |
function reverse_replace_ref(expr::Expr, names::Dict{Symbol, Int}) | |
return walk_replace_expression(reverse_replace_ref, expr, names) | |
end | |
# Fallback for numbers, etc. | |
reverse_replace_ref(expr::Any, names::Dict{Symbol, Int}) = expr | |
""" | |
symbolically_differentiate(expr) | |
Symbolically differentiate `expr` using Calculus.jl and return expressions for | |
the first and second order derivatives. | |
""" | |
function symbolically_differentiate(expr) | |
zeroth = copy(expr) | |
# Process the expression to remove references into an array. We undo this | |
# operation at the end of the function. This is needed for Calculus.jl. | |
names = Dict{Symbol, Int}() | |
replace_ref(zeroth, names) | |
N = length(names) | |
# Extract an ordered list of the names. | |
ordered_names = Vector{Symbol}(undef, N) | |
for (name, index) in names | |
ordered_names[index] = name | |
end | |
# Construct first order derivative, simplify, and undo the name | |
# substitution. | |
first_order = Calculus.differentiate(zeroth, ordered_names) | |
first_order = reverse_replace_ref.( | |
Calculus.simplify.(first_order), Ref(names) | |
) | |
# Construct second order derivatives, transform into a matrix, simplify, and | |
# undo the name substitution. | |
second_order = Calculus.differentiate.(first_order, Ref(ordered_names)) | |
second_order = [ | |
reverse_replace_ref(Calculus.simplify(second_order[i][j]), names) | |
for i in 1:N, j in 1:N | |
] | |
# Return the first and second order derivatives. | |
return first_order, second_order | |
end | |
# ============================================================================== | |
# | |
# Now we convert expressions (scalar and vector) into functions that we can | |
# call. We utilize two main features: that all of the variables are named | |
# `:(x[i])`, # and that no other symbols other than Julia functions exist. | |
# (Especially not functions called `:g`.) We can rely on this because | |
# MathOptFormat only supports a defined set of functions, and anything else is | |
# invalid. | |
# | |
# ============================================================================== | |
# A simple expression. Note that the only symbol in expr can be :x, so we're | |
# safe to evaluate this in global scope with @eval without consequence. | |
expr_to_function(expr::Any) = @eval (x) -> $expr | |
# We can also unpack a vector of expressions safely because we know they can | |
# only contain :x and not :g! Note how we unpack the loop so that there are just | |
# |expr| statements in the function! It's super fast and non-allocating! | |
function expr_to_function(expr::Vector{<:Any}) | |
body_expr = Expr(:block) | |
for i in 1:length(expr) | |
push!(body_expr.args, :(g[$i] = $(expr[i]))) | |
end | |
return @eval (g, x) -> begin | |
$body_expr | |
return | |
end | |
end | |
function construct_hessian_lagrangian( | |
num_constraints::Int, | |
hessian_structure::Vector{Tuple{Int, Int}}, | |
hessian_expressions::Dict{Tuple{Int, Int, Int}, Union{Real, Expr}}) | |
body_expr = quote | |
H .= 0.0 | |
end | |
for (i, (row, col)) in enumerate(hessian_structure) | |
if haskey(hessian_expressions, (row, col, 0)) | |
push!(body_expr.args, | |
:(H[$i] = σ * $(hessian_expressions[(row, col, 0)])) | |
) | |
end | |
for constraint_index in 1:num_constraints | |
if haskey(hessian_expressions, (row, col, constraint_index)) | |
push!(body_expr.args, :(H[$i] = μ[$(constraint_index)] * | |
$(hessian_expressions[(row, col, constraint_index)]) | |
)) | |
end | |
end | |
end | |
return @eval (H, x, σ, μ) -> begin | |
$body_expr | |
return | |
end | |
end | |
# ============================================================================== | |
# | |
# Here is the bulk of the logic. We take an objective expression and a vector | |
# of constraint expressions, and return a NLPEvaluator. | |
# | |
# ============================================================================== | |
function exprs_to_nlp_evaluator(objective::Expr, constraints::Vector{Expr}) | |
constraint_expressions = Expr[] | |
jacobian_structure = Tuple{Int64, Int64}[] | |
jacobian_expressions = Expr[] | |
hessian_expressions = Dict{Tuple{Int, Int, Int}, Union{Real, Expr}}() | |
obj_grad, obj_hess = diff(objective) | |
for col in 1:size(obj_hess, 2) | |
for row in 1:size(obj_hess, 1) | |
simple_expr = Calculus.simplify(obj_hess[row, col]) | |
if simple_expr != 0.0 | |
hessian_expressions[(row, col, 0)] = simple_expr | |
end | |
end | |
end | |
for (row, constraint) in enumerate(constraints) | |
constraint_expr = constraint.args[2] | |
push!(constraint_expressions, constraint_expr) | |
con_grad, con_hess = diff(constraint_expr) | |
for (col, expr) in enumerate(con_grad) | |
simple_expr = Calculus.simplify(expr) | |
if simple_expr != 0.0 | |
push!(jacobian_structure, (row, col)) | |
push!(jacobian_expressions, simple_expr) | |
end | |
end | |
for col_hess in 1:size(con_hess, 2) | |
for row_hess in 1:size(con_hess, 1) | |
simple_expr = Calculus.simplify(con_hess[row_hess, col_hess]) | |
if simple_expr != 0.0 | |
hessian_expressions[(row_hess, col_hess, row)] = simple_expr | |
end | |
end | |
end | |
end | |
hessian_structure = unique( | |
(r, c) for (r, c, k) in keys(hessian_expressions) | |
) | |
sort!(hessian_structure) | |
return NLPEvaluator( | |
objective, | |
expr_to_function(objective), | |
expr_to_function(obj_grad), | |
constraints, | |
expr_to_function(constraint_expressions), | |
jacobian_structure, | |
expr_to_function(jacobian_expressions), | |
hessian_structure, | |
construct_hessian_lagrangian( | |
length(constraints), hessian_structure, hessian_expressions) | |
) | |
end | |
using Test | |
nlp_evaluator = exprs_to_nlp_evaluator( | |
:(x[1] * x[4] * (x[1] + x[2] + x[3]) + x[3]), | |
[ | |
:(x[1] * x[2] * x[3] * x[4] >= 25), | |
:(x[1]^2 + x[2]^2 + x[3]^2 + x[4]^2 == 40) | |
] | |
) | |
@test @inferred MOI.eval_objective(nlp_evaluator, [1.0, 2.0, 3.0, 4.0]) == 27.0 | |
g = zeros(Float64, 4) | |
MOI.eval_objective_gradient(nlp_evaluator, g, [1.0, 2.0, 3.0, 4.0]) | |
@test g == [28.0, 4.0, 5.0, 6.0] | |
g = zeros(Float64, 2) | |
MOI.eval_constraint(nlp_evaluator, g, [1.0, 2.0, 3.0, 4.0]) | |
@test g == [24.0, 30.0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment