Created
February 18, 2017 20:18
-
-
Save timvieira/1967ee24d01e4dae8f54f1e145bb61eb to your computer and use it in GitHub Desktop.
Cartoon version of Jiawei's optimization problem.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Cartoon version of Jiawei's optimization problem. | |
Created [2017-02-17 Fri] | |
""" | |
import numpy as np | |
from scipy.optimize import fmin_bfgs | |
import autograd | |
from autograd import numpy as np | |
def norm(w): | |
return np.sqrt(np.sum(w*w)) | |
def group_lasso(G, w): | |
return np.sum([norm(w[g]) for g in G]) | |
def prox(G, w, threshold): | |
for g in G: | |
z = norm(w[g]) | |
if z <= threshold: | |
w[g] = 0 | |
else: | |
w[g] *= (z - threshold) / z | |
def l1_prox(w, threshold): | |
z = np.abs(w) | |
y = z <= threshold | |
w[y] = 0 | |
w[~y] *= (z[~y] - threshold) / z[~y] | |
def L(loss, G, beta, delta, c): | |
gamma = np.log(1+np.exp(beta)) | |
return (loss(gamma * delta) | |
+ c[0] * gamma.sum() | |
+ c[1] * np.abs(beta).sum() | |
+ c[2] * group_lasso(G, delta) | |
) | |
def L_no_regularizer(loss, beta, delta, c): | |
gamma = np.log(1+np.exp(beta)) | |
return (loss(gamma * delta) | |
+ c[0] * gamma.sum() | |
) | |
def main(): | |
np.random.seed(1234) | |
D = 10 | |
c = np.random.uniform(0,5,size=3) | |
G = [[i] for i in range(D)] | |
# Some arbitrary convex loss... here we pick a random quadratic | |
magic = np.ones(D) # unregularized opt is beta = magic. | |
A = np.random.uniform(-1,1,size=(D,D)) | |
A = A.dot(A.T) # random PSD matrix | |
def loss(Delta): | |
diff = (Delta - magic) | |
return np.dot(np.dot(diff.T, A), diff) | |
def obj_reg(w): | |
return L(loss, G, w[:D], w[D:], c) | |
def obj_smooth(w): | |
return L_no_regularizer(loss, w[:D], w[D:], c) | |
grad_reg = autograd.grad(obj_reg) | |
grad_smooth = autograd.grad(obj_smooth) | |
# Initialization | |
w0 = np.random.uniform(-1,1,size=2*D) | |
if 1: | |
print '#__________________________' | |
print '# L-BFGS' | |
opt = fmin_bfgs(obj_reg, w0, fprime=grad_reg, disp=-1) | |
#print opt | |
if 1: | |
print '#__________________________' | |
print '# Proximal gradient descent' | |
w = w0*1 | |
a = 0.001 # learning rate | |
prox_freq = 1 # prox update frequency | |
print_freq = 250 | |
iterations = 5000 | |
for t in range(1, iterations): | |
g = grad_smooth(w) | |
w -= a * g | |
if t % prox_freq == 0: | |
l1_prox(w[:D], prox_freq*a*c[1]) # l1 on beta | |
prox(G, w[D:], prox_freq*a*c[2]) # group lasso on delta | |
if t % print_freq == 0: | |
print '%4d %.2f' % (t, obj_reg(w)) | |
if 1: | |
print '#__________________________' | |
print '# Subgradient descent' | |
w = w0*1 | |
a = 0.001 # learning rate | |
prox_freq = 1 # prox update frequency | |
print_freq = 250 | |
iterations = 5000 | |
for t in range(1, iterations): | |
g = grad_reg(w) | |
w -= a * g | |
if t % print_freq == 0: | |
print '%4d %.2f' % (t, obj_reg(w)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment