Created
June 26, 2019 20:56
-
-
Save yiboyang/9f4bb5075449146c831b2e5c574fc187 to your computer and use it in GitHub Desktop.
Algorithm demos for Numerical Optimization by Nocedal and Wright.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# active_set_for_convex_qp.py | |
# Yibo Yang | |
# 04/16/2019 | |
# Algorithm 16.3 (active-set method for convex QP) in Numerical Optimization | |
import numpy as np | |
from numpy.linalg import solve, lstsq | |
printing_count_from_1 = 1 # set to 1 to match textbook numbering; 0 to use python numbering | |
def equality_constrained_qp(G, c, A, b): | |
""" | |
Solve min_x q(x) = 1/2 x^T G x + x^T c, subject to Ax = b | |
Assuming Z^T G Z is positive definite. | |
Used for solving (16.39) subproblem. | |
:param G: nxn | |
:param c: | |
:param A: m x n, should have full row rank | |
:param b: m vec | |
:return: | |
""" | |
if len(A) > 0: | |
[m, n] = A.shape | |
K = np.zeros([m + n, m + n]) # KKT matrix | |
K[:n, :n] = G | |
K[n:, :n] = A | |
K[:n, n:] = -A.T | |
rhs = np.hstack([-c, b]) | |
sol = solve(K, rhs) | |
x = sol[:n] | |
lam = sol[n:] | |
else: # unconstrained; simply set gradient to zero | |
x, _, _, _ = lstsq(G, -c, rcond=None) # use lstsq instead of solve in case G is only positive semidefinite | |
return x | |
def active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0=None, max_its=100): | |
""" | |
Solve min_x q(x) = 1/2 x^T G x + x^T c, subject to A1 x = b1, A2 x >= b2 | |
:param G: | |
:param c: | |
:param A1: m1 x n | |
:param b1: | |
:param A2: m2 x n | |
:param b2: | |
:param x0: initial primal var (feasible) | |
:param W0: initial working set (by default empty) | |
:return: | |
""" | |
m1 = len(A1) | |
m2 = len(A2) | |
if W0 is None: | |
# active_ineq_cons_idx = list(np.where(A1 @ x0 == b1)[0]) # subset of 0, 1, ..., m1-1 | |
# eq_cons_idx = list(range(m1, m1 + m2)) # eq cons always active; m1, m1+1, ..., m1+m2-1 | |
# W0 = active_ineq_cons_idx + eq_cons_idx # by default initialize working set to all active constraints at x0 | |
W0 = [] | |
W = W0 | |
A12 = np.vstack([A1, A2]) # block matrix of all constraints Jacobian, [A1; A2] | |
b12 = np.hstack([b1, b2]) | |
x = x0 | |
for k in range(max_its): | |
print(f'iteration {k}') | |
print(f'x_k={x}') | |
print(f'W_k={[i+printing_count_from_1 for i in W]}') | |
# solve (16.39) to find p_k | |
g = G @ x + c | |
A_ = np.array([A12[i] for i in W]) | |
# A_ = np.matrix([A12[i] for i in W]) | |
b_ = np.zeros(len(A_)) | |
p = equality_constrained_qp(G, g, A_, b_) | |
print(f'p_k={p}') | |
map_idx_in_W_to_cons_id = {idx: i for (idx, i) in enumerate(W)} | |
if np.all(np.isclose(p, 0)): | |
# compute Larange multipliers \hat \lambda_i that satisfy (16.42) | |
try: | |
lam_hat = solve(A_.T, g) # (16.42) | |
except: | |
# if overdetermined (e.g., lone Lagrange multiplier), np.linalg.solve will throw | |
# "numpy.linalg.linalg.LinAlgError: Last 2 dimensions of the array must be square" | |
lam_hat, _, _, _ = lstsq(A_.T, g, rcond=None) | |
W_intersect_I = [i for i in W if i < m1] # cons ids in W that correspond to ineq cons | |
# lam_hat_ineq_cons = [(i, lam) in enumerate(lam_hat) if map_idx_in_W_to_cons_id[i] < m1] | |
lam_hat_I = [lam for (j, lam) in enumerate(lam_hat) if map_idx_in_W_to_cons_id[j] in W_intersect_I] | |
print(f'\hat lambda corresponding to inequality constraints={lam_hat_I}') | |
lam_hat_I = np.array(lam_hat_I) | |
if np.all(lam_hat_I >= 0): | |
print(f'\hat lambda corresponding to inequality constraints all positive; returning') | |
print(f'optimal x={x}') | |
return x | |
else: | |
j = W_intersect_I[np.argmin(lam_hat_I)] | |
W.remove(j) | |
print(f'removed constraint {j+printing_count_from_1} from working set') | |
else: # p_k != 0 | |
x_plus_p = x + p | |
feasible = True | |
for j in range(m1): | |
if j not in W and np.dot(A1[j], x_plus_p) < b1[j]: # only need to check ineq cons that're not in W | |
feasible = False | |
break | |
if feasible: # no blocking cons; alpha = 1 | |
print('no blocking constraint') | |
x = x_plus_p | |
else: | |
blocking_cons = None | |
alpha = np.inf | |
for i in range(m1 + m2): | |
ai_dot_p = np.dot(A12[i], p) # ai^T p | |
if i not in W and ai_dot_p < 0: | |
alpha_prev = alpha | |
alpha = min(alpha, (b12[i] - np.dot(A12[i], x)) / ai_dot_p) | |
if alpha < alpha_prev: | |
blocking_cons = i | |
x = x + alpha * p | |
W += [blocking_cons] | |
print(f'added blocking constraint {blocking_cons+printing_count_from_1} to working set') | |
print() | |
return x | |
dtype = 'float' | |
n = 2 # number of variables | |
# textbook example 16.4 | |
# G = np.array([[2, 0], [0, 2]], dtype=dtype) | |
# c = np.array([-2, -5], dtype=dtype) | |
# A1 = np.array([[1, -2], | |
# [-1, -2], | |
# [-1, 2], | |
# [1, 0], | |
# [0, 1]], dtype=dtype) | |
# b1 = np.array([-2, -6, -2, 0, 0], dtype=dtype) | |
# A2 = np.zeros([0, n], dtype=dtype) | |
# b2 = np.array([], dtype=dtype) | |
# | |
# x0 = np.array([2, 0], dtype=dtype) | |
# # res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0) | |
# | |
# # let initial working set be {3} in textbook (note Python counts from 0) | |
# W0 = [2] | |
# print(W0) | |
# res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0) | |
# | |
# # let initial working set be {5} in textbook (note Python counts from 0) | |
# W0 = [4] | |
# print(W0) | |
# res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0) | |
# | |
# # let initial working set be empty | |
# W0 = [] | |
# print(W0) | |
# res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0) | |
# problem 16.11 | |
G = np.array([[1, -1], [-1, 2]], dtype=dtype) | |
c = np.array([-2, -6], dtype=dtype) | |
A1 = np.array([[-1 / 2, -1 / 2], | |
[1, -2], | |
[1, 0], | |
[0, 1]], dtype=dtype) | |
b1 = np.array([-1, -2, 0, 0], dtype=dtype) | |
A2 = np.zeros([0, n], dtype=dtype) | |
b2 = np.array([], dtype=dtype) | |
x0 = np.array([1 / 2, 1 / 2], dtype=dtype) | |
print(f'starting at interior point x0={x0}') | |
res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0) | |
print() | |
x0 = np.array([2, 0], dtype=dtype) | |
print(f'starting at a vertex x0={x0}') | |
res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0) | |
print() | |
x0 = np.array([1, 0], dtype=dtype) | |
print(f'starting at a non-vertex boundary point of feasible set x0={x0}') | |
res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0) | |
print() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# basic_interior_point.py | |
# Yibo Yang | |
# 05/01/2019 | |
# Solving problem 18.69, p. 562 of Numerical Optimization. Since this problem has no inequality constraints, there's | |
# no non-negativitiy condition to enforce in algorithm 19.1, and algorithm 19.1 (which directly solves the Newton-KKT | |
# system) reduces to algorithm 18.1, p. 532. | |
import numpy as np | |
from numpy.linalg import solve, lstsq, norm | |
def numerical_gradient(f, x, eps=1e-5): | |
# compute numerical gradient using centered finite-difference | |
x = np.asarray(x, dtype=float) | |
n = len(x) | |
grad = np.empty(n) | |
for i in range(n): | |
x[i] += eps | |
f1 = f(x) | |
x[i] -= 2 * eps | |
f2 = f(x) | |
grad[i] = (f1 - f2) / (2 * eps) | |
x[i] += eps # restore | |
return grad | |
# f = lambda x: np.sum(x ** 2) | |
# x = np.array([0., 1., 2.]) | |
# print(numerical_gradient(f, x)) | |
def solve_newton(L_hess, grad_f, A, c, x, lam): | |
# solve the Newton-KKT system | |
n = len(x) | |
m = len(lam) | |
mat = np.zeros([m + n, m + n]) | |
mat[:n, :n] = L_hess | |
mat[:n, -m:] = -A.T | |
mat[-m:, :n] = A | |
b = np.zeros([m + n]) | |
b[:n] = - grad_f + A.T @ lam | |
b[-m:] = -c | |
p = solve(mat, b) | |
p_x = p[:n] | |
p_lam = p[-m:] | |
return p_x, p_lam | |
# problem 18.69, p. 562 | |
x0 = np.array([-1.71, 1.59, 1.82, -0.763, -0.763]) | |
# x_opt = np.array([-1.8, 1.7, 1.9, -.8, -.8]) # not actual optimal; constraints not satisfied! | |
tol = 1e-10 | |
max_its = 50 | |
x = x0 | |
n = len(x) | |
m = 3 | |
lam = np.zeros(m) | |
def f(x): | |
return np.exp(np.prod(x)) - 0.5 * (x[0] ** 3 + x[1] ** 3 + 1) ** 2 | |
grad_f_symbolic = [None] * n | |
grad_f_symbolic[0] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[0] - 3 * x[0] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1) | |
grad_f_symbolic[1] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[1] - 3 * x[1] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1) | |
grad_f_symbolic[2] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[2] | |
grad_f_symbolic[3] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[3] | |
grad_f_symbolic[4] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[4] | |
grad_c_symbolic = [None] * m | |
grad_c_symbolic[0] = lambda x: 2 * x | |
grad_c_symbolic[1] = lambda x: np.array([0., x[2], x[1], -5 * x[4], -5 * x[3]]) | |
grad_c_symbolic[2] = lambda x: np.array([3 * x[0] ** 2, 3 * x[1] ** 2, 0, 0, 0.]) | |
for it in range(max_its): | |
print('iteration k =', it + 1) | |
print('x_k =', x) | |
print('f_k =', f(x)) | |
prod = np.prod(x) | |
grad_f = np.exp(prod) * prod / x | |
grad_f[0] -= 3 * x[0] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1) | |
grad_f[1] -= 3 * x[1] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1) | |
A = np.zeros([m, n]) | |
# A[0] = 2 * x | |
# A[1] = np.array([0, x[2], x[1], -5 * x[4], -5 * x[3]]) | |
# A[2] = np.array([3 * x[0] * 2, 3 * x[1] * 2, 0, 0, 0]) | |
for j in range(m): | |
A[j] = grad_c_symbolic[j](x) | |
c = np.zeros([m]) | |
c[0] = np.sum(x ** 2) - 10 | |
c[1] = x[1] * x[2] - 5 * x[3] * x[4] | |
c[2] = x[0] ** 3 + x[1] ** 3 + 1 | |
def dL_dxi(x, i): | |
res = 0 | |
for j in range(m): | |
res -= lam[j] * grad_c_symbolic[j](x)[i] | |
res += grad_f_symbolic[i](x) | |
return res | |
L_hess = np.empty([n, n]) | |
for i in range(n): | |
L_hess[i] = numerical_gradient(lambda x: dL_dxi(x, i), x) | |
p_x, p_lam = solve_newton(L_hess, grad_f, A, c, x, lam) | |
x_prev = x.copy() | |
x += p_x | |
lam += p_lam | |
if norm(x - x_prev) < tol: | |
print('||x_{k+1} - x_k|| < %g, terminating' % tol) | |
break | |
print() | |
print('final solution x=', x) | |
c = np.zeros([m]) | |
c[0] = np.sum(x ** 2) - 10 | |
c[1] = x[1] * x[2] - 5 * x[3] * x[4] | |
c[2] = x[0] ** 3 + x[1] ** 3 + 1 | |
for i in range(m): | |
print('c_%d(x) =' % (i + 1), c[i]) | |
############################################################# | |
# Execution Printout: | |
# iteration k = 1 | |
# x_k = [-1.71 1.59 1.82 -0.763 -0.763] | |
# f_k = 0.0559001518641 | |
# iteration k = 2 | |
# x_k = [-1.71644697 1.59488994 1.82858782 -0.76372206 -0.76372206] | |
# f_k = 0.0539464669897 | |
# iteration k = 3 | |
# x_k = [-1.71714261 1.59570867 1.82724803 -0.76364346 -0.76364346] | |
# f_k = 0.0539496822767 | |
# iteration k = 4 | |
# x_k = [-1.71714357 1.59570969 1.82724575 -0.76364308 -0.76364308] | |
# f_k = 0.0539498477699 | |
# ||x_{k+1} - x_k|| < 1e-10, terminating | |
# | |
# final solution x= [-1.71714357 1.59570969 1.82724575 -0.76364308 -0.76364308] | |
# c_1(x) = 3.5527136788e-15 | |
# c_2(x) = -8.881784197e-16 | |
# c_3(x) = 1.7763568394e-14 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment