timvieira · February 18, 2017 20:18
diff --git a/jiawei.py b/jiawei.py
 """
 Cartoon version of Jiawei's optimization problem.

 Created [2017-02-17 Fri]

 """
 import numpy as np
 from scipy.optimize import fmin_bfgs

 import autograd
 from autograd import numpy as np

 def norm(w):
    return np.sqrt(np.sum(w*w))

 def group_lasso(G, w):
    return np.sum([norm(w[g]) for g in G])

 def prox(G, w, threshold):
    for g in G:
        z = norm(w[g])
        if z <= threshold:
            w[g] = 0
        else:
            w[g] *= (z - threshold) / z

 def l1_prox(w, threshold):
    z = np.abs(w)
    y = z <= threshold
    w[y] = 0
    w[~y] *= (z[~y] - threshold) / z[~y]


 def L(loss, G, beta, delta, c):
    gamma = np.log(1+np.exp(beta))
    return (loss(gamma * delta)
            + c[0] * gamma.sum()
            + c[1] * np.abs(beta).sum()
            + c[2] * group_lasso(G, delta)
           )

 def L_no_regularizer(loss, beta, delta, c):
    gamma = np.log(1+np.exp(beta))
    return (loss(gamma * delta)
            + c[0] * gamma.sum()
           )


 def main():
    np.random.seed(1234)

    D = 10
    c = np.random.uniform(0,5,size=3)

    G = [[i] for i in range(D)]

    # Some arbitrary convex loss... here we pick a random quadratic
    magic = np.ones(D)    # unregularized opt is beta = magic.
    A = np.random.uniform(-1,1,size=(D,D))
    A = A.dot(A.T)   # random PSD matrix
    def loss(Delta):
        diff = (Delta - magic)
        return np.dot(np.dot(diff.T, A), diff)

    def obj_reg(w):
        return L(loss, G, w[:D], w[D:], c)

    def obj_smooth(w):
        return L_no_regularizer(loss, w[:D], w[D:], c)

    grad_reg = autograd.grad(obj_reg)
    grad_smooth = autograd.grad(obj_smooth)

    # Initialization
    w0 = np.random.uniform(-1,1,size=2*D)

    if 1:
        print '#__________________________'
        print '# L-BFGS'
        opt = fmin_bfgs(obj_reg, w0, fprime=grad_reg, disp=-1)
        #print opt

    if 1:
        print '#__________________________'
        print '# Proximal gradient descent'
        w = w0*1
        a = 0.001           # learning rate
        prox_freq = 1       # prox update frequency
        print_freq = 250
        iterations = 5000
        for t in range(1, iterations):
            g = grad_smooth(w)
            w -= a * g
            if t % prox_freq == 0:
                l1_prox(w[:D], prox_freq*a*c[1])   # l1 on beta
                prox(G, w[D:], prox_freq*a*c[2])   # group lasso on delta
            if t % print_freq == 0:
                print '%4d %.2f' % (t, obj_reg(w))

    if 1:
        print '#__________________________'
        print '# Subgradient descent'
        w = w0*1
        a = 0.001           # learning rate
        prox_freq = 1       # prox update frequency
        print_freq = 250
        iterations = 5000
        for t in range(1, iterations):
            g = grad_reg(w)
            w -= a * g
            if t % print_freq == 0:
                print '%4d %.2f' % (t, obj_reg(w))


 if __name__ == '__main__':
    main()
	"""
	Cartoon version of Jiawei's optimization problem.

	Created [2017-02-17 Fri]

	"""
	import numpy as np
	from scipy.optimize import fmin_bfgs

	import autograd
	from autograd import numpy as np

	def norm(w):
	return np.sqrt(np.sum(w*w))

	def group_lasso(G, w):
	return np.sum([norm(w[g]) for g in G])

	def prox(G, w, threshold):
	for g in G:
	z = norm(w[g])
	if z <= threshold:
	w[g] = 0
	else:
	w[g] *= (z - threshold) / z

	def l1_prox(w, threshold):
	z = np.abs(w)
	y = z <= threshold
	w[y] = 0
	w[~y] *= (z[~y] - threshold) / z[~y]


	def L(loss, G, beta, delta, c):
	gamma = np.log(1+np.exp(beta))
	return (loss(gamma * delta)
	+ c[0] * gamma.sum()
	+ c[1] * np.abs(beta).sum()
	+ c[2] * group_lasso(G, delta)
	)

	def L_no_regularizer(loss, beta, delta, c):
	gamma = np.log(1+np.exp(beta))
	return (loss(gamma * delta)
	+ c[0] * gamma.sum()
	)


	def main():
	np.random.seed(1234)

	D = 10
	c = np.random.uniform(0,5,size=3)

	G = [[i] for i in range(D)]

	# Some arbitrary convex loss... here we pick a random quadratic
	magic = np.ones(D) # unregularized opt is beta = magic.
	A = np.random.uniform(-1,1,size=(D,D))
	A = A.dot(A.T) # random PSD matrix
	def loss(Delta):
	diff = (Delta - magic)
	return np.dot(np.dot(diff.T, A), diff)

	def obj_reg(w):
	return L(loss, G, w[:D], w[D:], c)

	def obj_smooth(w):
	return L_no_regularizer(loss, w[:D], w[D:], c)

	grad_reg = autograd.grad(obj_reg)
	grad_smooth = autograd.grad(obj_smooth)

	# Initialization
	w0 = np.random.uniform(-1,1,size=2*D)

	if 1:
	print '#__________________________'
	print '# L-BFGS'
	opt = fmin_bfgs(obj_reg, w0, fprime=grad_reg, disp=-1)
	#print opt

	if 1:
	print '#__________________________'
	print '# Proximal gradient descent'
	w = w0*1
	a = 0.001 # learning rate
	prox_freq = 1 # prox update frequency
	print_freq = 250
	iterations = 5000
	for t in range(1, iterations):
	g = grad_smooth(w)
	w -= a * g
	if t % prox_freq == 0:
	l1_prox(w[:D], prox_freqac[1]) # l1 on beta
	prox(G, w[D:], prox_freqac[2]) # group lasso on delta
	if t % print_freq == 0:
	print '%4d %.2f' % (t, obj_reg(w))

	if 1:
	print '#__________________________'
	print '# Subgradient descent'
	w = w0*1
	a = 0.001 # learning rate
	prox_freq = 1 # prox update frequency
	print_freq = 250
	iterations = 5000
	for t in range(1, iterations):
	g = grad_reg(w)
	w -= a * g
	if t % print_freq == 0:
	print '%4d %.2f' % (t, obj_reg(w))


	if __name__ == '__main__':
	main()