andfoy · May 21, 2015 20:15
diff --git a/minimize.py b/minimize.py
 #This program is distributed WITHOUT ANY WARRANTY; without even the implied 
 #warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 #
 #
 #This file contains a Python version of Carl Rasmussen's Matlab-function 
 #minimize.m
 #
 #minimize.m is copyright (C) 1999 - 2006, Carl Edward Rasmussen.
 #Python adaptation by Roland Memisevic 2008.
 #
 #
 #The following is the original copyright notice that comes with the 
 #function minimize.m
 #(from http://www.kyb.tuebingen.mpg.de/bs/people/carl/code/minimize/Copyright):
 #
 #
 #"(C) Copyright 1999 - 2006, Carl Edward Rasmussen
 #
 #Permission is granted for anyone to copy, use, or modify these
 #programs and accompanying documents for purposes of research or
 #education, provided this copyright notice is retained, and note is
 #made of any changes that have been made.
 #
 #These programs and documents are distributed without any warranty,
 #express or implied.  As the programs were written for research
 #purposes only, they have not been tested to the degree that would be
 #advisable in any important application.  All use of these programs is
 #entirely at the user's own risk."


 """minimize.py 

 This module contains a function 'minimize' that performs unconstrained
 gradient based optimization using nonlinear conjugate gradients. 

 The function is a straightforward Python-translation of Carl Rasmussen's
 Matlab-function minimize.m

 """


 from numpy import dot, isinf, isnan, any, sqrt, isreal, real, nan, inf

 def minimize(f, X, maxnumlinesearch=None, maxnumfuneval=None, red=1.0, verbose=True):
    INT = 0.1;# don't reevaluate within 0.1 of the limit of the current bracket
    EXT = 3.0;              # extrapolate maximum 3 times the current step-size
    MAX = 20;                     # max 20 function evaluations per line search
    RATIO = 10;                                   # maximum allowed slope ratio
    SIG = 0.1;RHO = SIG/2;# SIG and RHO are the constants controlling the Wolfe-
    #Powell conditions. SIG is the maximum allowed absolute ratio between
    #previous and new slopes (derivatives in the search direction), thus setting
    #SIG to low (positive) values forces higher precision in the line-searches.
    #RHO is the minimum allowed fraction of the expected (from the slope at the
    #initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1.
    #Tuning of SIG (depending on the nature of the function to be optimized) may
    #speed up the minimization; it is probably not worth playing much with RHO.

    SMALL = 10.**-16                    #minimize.m uses matlab's realmin 
    
    if maxnumlinesearch == None:
        if maxnumfuneval == None:
            raise "Specify maxnumlinesearch or maxnumfuneval"
        else:
            S = 'Function evaluation'
            length = maxnumfuneval
    else:
        if maxnumfuneval != None:
            raise "Specify either maxnumlinesearch or maxnumfuneval (not both)"
        else: 
            S = 'Linesearch'
            length = maxnumlinesearch

    i = 0                                         # zero the run length counter
    ls_failed = 0                          # no previous line search has failed
    f0, df0 = f(X)                          # get function value and gradient
    fX = [f0]
    i = i + (length<0)                                         # count epochs?!
    s = -df0; d0 = -dot(s,s)    # initial search direction (steepest) and slope
    x3 = red/(1.0-d0)                             # initial step is red/(|s|+1)

    while i < abs(length):                                 # while not finished
        i = i + (length>0)                                 # count iterations?!

        X0 = X; F0 = f0; dF0 = df0              # make a copy of current values
        if length>0:
            M = MAX
        else: 
            M = min(MAX, -length-i)
        while 1:                      # keep extrapolating as long as necessary
            x2 = 0; f2 = f0; d2 = d0; f3 = f0; df3 = df0
            success = 0
            while (not success) and (M > 0):
                try:
                    M = M - 1; i = i + (length<0)              # count epochs?!
                    f3,df3 = f(X+x3*s)
                    if isnan(f3) or isinf(f3) or any(isnan(df3)+isinf(df3)):
                        print "error"
                        return
                    success = 1
                except:                    # catch any error which occured in f
                    x3 = (x2+x3)/2                       # bisect and try again
            if f3 < F0:
                X0 = X+x3*s; F0 = f3; dF0 = df3   # keep best values
            d3 = dot(df3,s)                                         # new slope
            if d3 > SIG*d0 or f3 > f0+x3*RHO*d0 or M == 0:  
                                                   # are we done extrapolating?
                break
            x1 = x2; f1 = f2; d1 = d2                 # move point 2 to point 1
            x2 = x3; f2 = f3; d2 = d3                 # move point 3 to point 2
            A = 6*(f1-f2)+3*(d2+d1)*(x2-x1)          # make cubic extrapolation
            B = 3*(f2-f1)-(2*d1+d2)*(x2-x1)
            Z = B+sqrt(complex(B*B-A*d1*(x2-x1)))
            if Z != 0.0:
                x3 = x1-d1*(x2-x1)**2/Z              # num. error possible, ok!
            else: 
                x3 = inf
            if (not isreal(x3)) or isnan(x3) or isinf(x3) or (x3 < 0): 
                                                       # num prob | wrong sign?
                x3 = x2*EXT                        # extrapolate maximum amount
            elif x3 > x2*EXT:           # new point beyond extrapolation limit?
                x3 = x2*EXT                        # extrapolate maximum amount
            elif x3 < x2+INT*(x2-x1):  # new point too close to previous point?
                x3 = x2+INT*(x2-x1)
            x3 = real(x3)

        while (abs(d3) > -SIG*d0 or f3 > f0+x3*RHO*d0) and M > 0:  
                                                           # keep interpolating
            if (d3 > 0) or (f3 > f0+x3*RHO*d0):            # choose subinterval
                x4 = x3; f4 = f3; d4 = d3             # move point 3 to point 4
            else:
                x2 = x3; f2 = f3; d2 = d3             # move point 3 to point 2
            if f4 > f0:           
                x3 = x2-(0.5*d2*(x4-x2)**2)/(f4-f2-d2*(x4-x2))
                                                      # quadratic interpolation
            else:
                A = 6*(f2-f4)/(x4-x2)+3*(d4+d2)           # cubic interpolation
                B = 3*(f4-f2)-(2*d2+d4)*(x4-x2)
                if A != 0:
                    x3=x2+(sqrt(B*B-A*d2*(x4-x2)**2)-B)/A
                                                     # num. error possible, ok!
                else:
                    x3 = inf
            if isnan(x3) or isinf(x3):
                x3 = (x2+x4)/2      # if we had a numerical problem then bisect
            x3 = max(min(x3, x4-INT*(x4-x2)),x2+INT*(x4-x2))  
                                                       # don't accept too close
            f3,df3 = f(X+x3*s)
            if f3 < F0:
                X0 = X+x3*s; F0 = f3; dF0 = df3              # keep best values
            M = M - 1; i = i + (length<0)                      # count epochs?!
            d3 = dot(df3,s)                                         # new slope

        if abs(d3) < -SIG*d0 and f3 < f0+x3*RHO*d0:  # if line search succeeded
            X = X+x3*s; f0 = f3; fX.append(f0)               # update variables
            if verbose: print '%s %6i;  Value %4.6e\r' % (S, i, f0)
            s = (dot(df3,df3)-dot(df0,df3))/dot(df0,df0)*s - df3
                                                  # Polack-Ribiere CG direction
            df0 = df3                                        # swap derivatives
            d3 = d0; d0 = dot(df0,s)
            if d0 > 0:                             # new slope must be negative
                s = -df0; d0 = -dot(s,s)     # otherwise use steepest direction
            x3 = x3 * min(RATIO, d3/(d0-SMALL))     # slope ratio but max RATIO
            ls_failed = 0                       # this line search did not fail
        else:
            X = X0; f0 = F0; df0 = dF0              # restore best point so far
            if ls_failed or (i>abs(length)):# line search failed twice in a row
                break                    # or we ran out of time, so we give up
            s = -df0; d0 = -dot(s,s)                             # try steepest
            x3 = 1/(1-d0)                     
            ls_failed = 1                             # this line search failed
    if verbose: print "\n"
    return X, fX, i
	#This program is distributed WITHOUT ANY WARRANTY; without even the implied
	#warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	#
	#This file contains a Python version of Carl Rasmussen's Matlab-function
	#minimize.m
	#
	#minimize.m is copyright (C) 1999 - 2006, Carl Edward Rasmussen.
	#Python adaptation by Roland Memisevic 2008.
	#
	#
	#The following is the original copyright notice that comes with the
	#function minimize.m
	#(from http://www.kyb.tuebingen.mpg.de/bs/people/carl/code/minimize/Copyright):
	#
	#
	#"(C) Copyright 1999 - 2006, Carl Edward Rasmussen
	#
	#Permission is granted for anyone to copy, use, or modify these
	#programs and accompanying documents for purposes of research or
	#education, provided this copyright notice is retained, and note is
	#made of any changes that have been made.
	#
	#These programs and documents are distributed without any warranty,
	#express or implied. As the programs were written for research
	#purposes only, they have not been tested to the degree that would be
	#advisable in any important application. All use of these programs is
	#entirely at the user's own risk."


	"""minimize.py

	This module contains a function 'minimize' that performs unconstrained
	gradient based optimization using nonlinear conjugate gradients.

	The function is a straightforward Python-translation of Carl Rasmussen's
	Matlab-function minimize.m

	"""


	from numpy import dot, isinf, isnan, any, sqrt, isreal, real, nan, inf

	def minimize(f, X, maxnumlinesearch=None, maxnumfuneval=None, red=1.0, verbose=True):
	INT = 0.1;# don't reevaluate within 0.1 of the limit of the current bracket
	EXT = 3.0; # extrapolate maximum 3 times the current step-size
	MAX = 20; # max 20 function evaluations per line search
	RATIO = 10; # maximum allowed slope ratio
	SIG = 0.1;RHO = SIG/2;# SIG and RHO are the constants controlling the Wolfe-
	#Powell conditions. SIG is the maximum allowed absolute ratio between
	#previous and new slopes (derivatives in the search direction), thus setting
	#SIG to low (positive) values forces higher precision in the line-searches.
	#RHO is the minimum allowed fraction of the expected (from the slope at the
	#initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1.
	#Tuning of SIG (depending on the nature of the function to be optimized) may
	#speed up the minimization; it is probably not worth playing much with RHO.

	SMALL = 10.**-16 #minimize.m uses matlab's realmin

	if maxnumlinesearch == None:
	if maxnumfuneval == None:
	raise "Specify maxnumlinesearch or maxnumfuneval"
	else:
	S = 'Function evaluation'
	length = maxnumfuneval
	else:
	if maxnumfuneval != None:
	raise "Specify either maxnumlinesearch or maxnumfuneval (not both)"
	else:
	S = 'Linesearch'
	length = maxnumlinesearch

	i = 0 # zero the run length counter
	ls_failed = 0 # no previous line search has failed
	f0, df0 = f(X) # get function value and gradient
	fX = [f0]
	i = i + (length<0) # count epochs?!
	s = -df0; d0 = -dot(s,s) # initial search direction (steepest) and slope
	x3 = red/(1.0-d0) # initial step is red/(\|s\|+1)

	while i < abs(length): # while not finished
	i = i + (length>0) # count iterations?!

	X0 = X; F0 = f0; dF0 = df0 # make a copy of current values
	if length>0:
	M = MAX
	else:
	M = min(MAX, -length-i)
	while 1: # keep extrapolating as long as necessary
	x2 = 0; f2 = f0; d2 = d0; f3 = f0; df3 = df0
	success = 0
	while (not success) and (M > 0):
	try:
	M = M - 1; i = i + (length<0) # count epochs?!
	f3,df3 = f(X+x3*s)
	if isnan(f3) or isinf(f3) or any(isnan(df3)+isinf(df3)):
	print "error"
	return
	success = 1
	except: # catch any error which occured in f
	x3 = (x2+x3)/2 # bisect and try again
	if f3 < F0:
	X0 = X+x3*s; F0 = f3; dF0 = df3 # keep best values
	d3 = dot(df3,s) # new slope
	if d3 > SIGd0 or f3 > f0+x3RHO*d0 or M == 0:
	# are we done extrapolating?
	break
	x1 = x2; f1 = f2; d1 = d2 # move point 2 to point 1
	x2 = x3; f2 = f3; d2 = d3 # move point 3 to point 2
	A = 6(f1-f2)+3(d2+d1)*(x2-x1) # make cubic extrapolation
	B = 3(f2-f1)-(2d1+d2)*(x2-x1)
	Z = B+sqrt(complex(BB-Ad1*(x2-x1)))
	if Z != 0.0:
	x3 = x1-d1(x2-x1)*2/Z # num. error possible, ok!
	else:
	x3 = inf
	if (not isreal(x3)) or isnan(x3) or isinf(x3) or (x3 < 0):
	# num prob \| wrong sign?
	x3 = x2*EXT # extrapolate maximum amount
	elif x3 > x2*EXT: # new point beyond extrapolation limit?
	x3 = x2*EXT # extrapolate maximum amount
	elif x3 < x2+INT*(x2-x1): # new point too close to previous point?
	x3 = x2+INT*(x2-x1)
	x3 = real(x3)

	while (abs(d3) > -SIGd0 or f3 > f0+x3RHO*d0) and M > 0:
	# keep interpolating
	if (d3 > 0) or (f3 > f0+x3RHOd0): # choose subinterval
	x4 = x3; f4 = f3; d4 = d3 # move point 3 to point 4
	else:
	x2 = x3; f2 = f3; d2 = d3 # move point 3 to point 2
	if f4 > f0:
	x3 = x2-(0.5d2(x4-x2)*2)/(f4-f2-d2(x4-x2))
	# quadratic interpolation
	else:
	A = 6(f2-f4)/(x4-x2)+3(d4+d2) # cubic interpolation
	B = 3(f4-f2)-(2d2+d4)*(x4-x2)
	if A != 0:
	x3=x2+(sqrt(BB-Ad2(x4-x2)*2)-B)/A
	# num. error possible, ok!
	else:
	x3 = inf
	if isnan(x3) or isinf(x3):
	x3 = (x2+x4)/2 # if we had a numerical problem then bisect
	x3 = max(min(x3, x4-INT(x4-x2)),x2+INT(x4-x2))
	# don't accept too close
	f3,df3 = f(X+x3*s)
	if f3 < F0:
	X0 = X+x3*s; F0 = f3; dF0 = df3 # keep best values
	M = M - 1; i = i + (length<0) # count epochs?!
	d3 = dot(df3,s) # new slope

	if abs(d3) < -SIGd0 and f3 < f0+x3RHO*d0: # if line search succeeded
	X = X+x3*s; f0 = f3; fX.append(f0) # update variables
	if verbose: print '%s %6i; Value %4.6e\r' % (S, i, f0)
	s = (dot(df3,df3)-dot(df0,df3))/dot(df0,df0)*s - df3
	# Polack-Ribiere CG direction
	df0 = df3 # swap derivatives
	d3 = d0; d0 = dot(df0,s)
	if d0 > 0: # new slope must be negative
	s = -df0; d0 = -dot(s,s) # otherwise use steepest direction
	x3 = x3 * min(RATIO, d3/(d0-SMALL)) # slope ratio but max RATIO
	ls_failed = 0 # this line search did not fail
	else:
	X = X0; f0 = F0; df0 = dF0 # restore best point so far
	if ls_failed or (i>abs(length)):# line search failed twice in a row
	break # or we ran out of time, so we give up
	s = -df0; d0 = -dot(s,s) # try steepest
	x3 = 1/(1-d0)
	ls_failed = 1 # this line search failed
	if verbose: print "\n"
	return X, fX, i
No results found