suminb · December 21, 2012 03:51
diff --git a/regularize.py b/regularize.py
 ## fitpoly_incomplete.Py
 # Partial script to help get started in ISTA 421/521 Homework 2

 #import sys
 #sys.path.reverse()

 import numpy as np
 import matplotlib.pyplot as plt
 #from scipy.optimize import minimize
 from scipy.optimize import minimize_scalar

 def K_fordCV(Lambda):
    
    male100 = np.asmatrix(np.genfromtxt('/Users/sumin/dev/masters/ista521/homework2/data/synthdata.csv', delimiter = ' ', dtype = None))
    
    x = male100[:, 0] # Olympic years
    t = male100[:, 1] # Winning times
    maxorder=4
    N=x.size
    K=10
    testx = np.linspace(x[0,0], x[x.size-1,0], 1001) # Large, independent test set
    testt = 5*testx**3 - testx**2 + testx + 150*np.random.randn(testx.shape[0], 1)
    # convert everything to matrices
    x = np.asmatrix(x)
    t = np.asmatrix(t)
    testx = np.asmatrix(testx).conj().transpose()
    testt = np.asmatrix(testt).conj().transpose()
    X = np.asmatrix(np.zeros(shape = (x.shape[0], maxorder + 1)))
    testX = np.asmatrix(np.zeros(shape = (testx.shape[0], maxorder + 1)))
    sizes = (N/K)*np.ones(K)
    csizes = np.zeros(sizes.shape[0] + 1)
    csizes[1:csizes.shape[0]] = sizes.cumsum()
    
    cv_loss = np.zeros((K, maxorder + 1))
        
    for k in range(maxorder + 1):
       # print k
        X[:, k] = np.power(x, k)
       #  print X
        testX[:, k] = np.power(testx, k)
       # print testX

        # print "start"
        for fold in range(K):
            # Partition the data
            # foldX contains the data for just one fold
            # trainX contains all other data
            
            foldX = X[csizes[fold]+1:csizes[fold+1]+1,0:k+1]
            trainX = np.copy(X[:, 0:k+1])
            trainX = np.asmatrix(np.delete(trainX, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0))
            foldt = t[csizes[fold]+1:csizes[fold+1]+1,:]
            traint = np.copy(t)
            traint = np.asmatrix(np.delete(traint, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0))
            # print "p"
            # print trainX.conj().transpose()*trainX+N*Lambda*np.identity(trainX[0].size)
            # print "a"
            # print trainX.conj().transpose()*trainX
            # print "b"
            # print N*Lambda*np.identity(trainX[0].size)
            # print trainX.conj().transpose()*trainX+N*Lambda
            # print "c"
            # print trainX[0].size
            # print k

            # print "d"
            # print trainX.conj().transpose()
            # w = np.linalg.inv(trainX.conj().transpose()*trainX+N*Lambda)*trainX.conj().transpose()*traint
            w =np.linalg.inv(trainX.conj().transpose()*trainX+N*Lambda*np.identity(trainX[0].size))*trainX.conj().transpose()*traint
            fold_pred = foldX*w
            cv_loss[fold,k] = np.mean(np.power(fold_pred-foldt, 2))
        #    print cv_loss[fold,k]
      #  print np.log(cv_loss)
    a=np.mean(np.log(cv_loss),0)
    #print a
    return a[k]

 f = K_fordCV
 res = minimize_scalar(f, method='brent')

 print res
 result= res.x
 # print result

 male100 = np.asmatrix(np.genfromtxt('../data/olympics.csv', delimiter = ' ', dtype = None))

 x = male100[:, 0] # Olympic years
 t = male100[:, 1] # Winning times

 # The following rescales x for numerical reasons
 # (In hw2 you are asked to fit 4th order polynomials, which involves
 # taking the 4th power of the input x values; taking the 4th power
 # of years gets large!)
 x = x - x[0]
 x = x/4

 # x (and t) is currently a column of a matrix object
 # the following 'flattens' x into an array we can iterate over
 flatx = np.asarray(x).flatten()

 ################################################################
 ## Linear model

 # Note: You will notice in the following code that there appear
 # two be "two versions" of the input x values that we range
 # over: (1) x itself (the scaled olympic years) and
 #       (2) the pair of variables plotx and plotX.
 # The plotx and plotX are an approximation of running "continuously"
 # over the x-axis so that we can get a nice, smooth plot of
 # our linear model; this will be particularly noticable when
 # we plot higher-order polynomials

 # The following line creates a large number of equally spaced points
 # between just before the scaled Olympic years start and just after
 # they end.  This gives us a set of points to use to plot our model
 # so that it looks like a smooth curve.
 plotx = np.asmatrix(np.linspace(flatx[0]-2, flatx[-1]+2, (flatx[-1]-flatx[0]+4)/0.01 + 1)).conj().transpose()

 # The following code generalizes to nth-order polynomial linear models.
 # The variable 'model_order' determines the order of the polynomial.
 # The default value is set to 1 for a first-order polynomial, i.e., 
 # a line embedded in 2-dimensions
 model_order = 4 
 X_columns = model_order + 1

 # Create X (the 'design' matrix) of appropriate size for the input data
 X = np.asmatrix(np.zeros((x.shape[0], X_columns)))
 # Create the second set of values that will be used as the x values for making
 # a smooth plot of our model
 plotX = np.asmatrix(np.zeros((plotx.shape[0], X_columns)))
 for k in range(X_columns):
    # fill in the X matrix with the input values, to the appropriate polynomial power
    X[:,k] = np.power(x, k)
    # fill in the plotX matrix with the finer-grained x-axis values,
    # to the appropriate polynomial power
    plotX[:,k] = np.power(plotx, k)

 # Here's the key step!: create the matrix version of the normal equation(s)
 # The following python functions will be useful:
 #   np.linalg.inv(X) <-- returns the inverse of X
 #   X.transpose()    <-- transposes X
 #   X.conj()         <-- not required, but this makes your math safe in case
 #                        the input values include complex numbers
 #                        (you do this before transpose)
 #ap=np.matrix(np.linalg.inv( np.dot(X.transpose(),X) + N*result*np.identity(trainX[0].size) ))
 # w = np.dot(np.dot( ap,X.transpose() ),np.asarray(t))### FILL THIS IN

 a = np.dot(X.transpose(),X)
 N= t.size
 b= np.linalg.inv(a+N*result*np.identity(a[0].size))
 c= np.dot(X.transpose() , np.asarray(t))
 w = np.dot(b,c)

 print "w:"
 print w

 ## plt.close()

 # create figure 2
 plt.figure(0)
 # plot the input (x,t) data
 plt.scatter(np.asarray(x), np.asarray(t), edgecolor = 'b', color = 'w', marker = 'o')
 # plot the linear model itself as a smooth line
 plt.plot(plotx, plotX*w, color = 'r', linewidth = 2)
 # add labels to your figure!
 plt.xlabel('Olympic number (note, not year!)')
 plt.ylabel('Winning time')
 plt.title('Olympic data with best fit model')
 plt.show()
 # This last line may or may not be necessary to keep your matplotlib window open
 raw_input('Press <ENTER> to quit...')
	## fitpoly_incomplete.Py
	# Partial script to help get started in ISTA 421/521 Homework 2

	#import sys
	#sys.path.reverse()

	import numpy as np
	import matplotlib.pyplot as plt
	#from scipy.optimize import minimize
	from scipy.optimize import minimize_scalar

	def K_fordCV(Lambda):

	male100 = np.asmatrix(np.genfromtxt('/Users/sumin/dev/masters/ista521/homework2/data/synthdata.csv', delimiter = ' ', dtype = None))

	x = male100[:, 0] # Olympic years
	t = male100[:, 1] # Winning times
	maxorder=4
	N=x.size
	K=10
	testx = np.linspace(x[0,0], x[x.size-1,0], 1001) # Large, independent test set
	testt = 5testx3 - testx2 + testx + 150np.random.randn(testx.shape[0], 1)
	# convert everything to matrices
	x = np.asmatrix(x)
	t = np.asmatrix(t)
	testx = np.asmatrix(testx).conj().transpose()
	testt = np.asmatrix(testt).conj().transpose()
	X = np.asmatrix(np.zeros(shape = (x.shape[0], maxorder + 1)))
	testX = np.asmatrix(np.zeros(shape = (testx.shape[0], maxorder + 1)))
	sizes = (N/K)*np.ones(K)
	csizes = np.zeros(sizes.shape[0] + 1)
	csizes[1:csizes.shape[0]] = sizes.cumsum()

	cv_loss = np.zeros((K, maxorder + 1))

	for k in range(maxorder + 1):
	# print k
	X[:, k] = np.power(x, k)
	# print X
	testX[:, k] = np.power(testx, k)
	# print testX

	# print "start"
	for fold in range(K):
	# Partition the data
	# foldX contains the data for just one fold
	# trainX contains all other data

	foldX = X[csizes[fold]+1:csizes[fold+1]+1,0:k+1]
	trainX = np.copy(X[:, 0:k+1])
	trainX = np.asmatrix(np.delete(trainX, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0))
	foldt = t[csizes[fold]+1:csizes[fold+1]+1,:]
	traint = np.copy(t)
	traint = np.asmatrix(np.delete(traint, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0))
	# print "p"
	# print trainX.conj().transpose()trainX+NLambda*np.identity(trainX[0].size)
	# print "a"
	# print trainX.conj().transpose()*trainX
	# print "b"
	# print NLambdanp.identity(trainX[0].size)
	# print trainX.conj().transpose()trainX+NLambda
	# print "c"
	# print trainX[0].size
	# print k

	# print "d"
	# print trainX.conj().transpose()
	# w = np.linalg.inv(trainX.conj().transpose()trainX+NLambda)trainX.conj().transpose()traint
	w =np.linalg.inv(trainX.conj().transpose()trainX+NLambdanp.identity(trainX[0].size))trainX.conj().transpose()*traint
	fold_pred = foldX*w
	cv_loss[fold,k] = np.mean(np.power(fold_pred-foldt, 2))
	# print cv_loss[fold,k]
	# print np.log(cv_loss)
	a=np.mean(np.log(cv_loss),0)
	#print a
	return a[k]

	f = K_fordCV
	res = minimize_scalar(f, method='brent')

	print res
	result= res.x
	# print result

	male100 = np.asmatrix(np.genfromtxt('../data/olympics.csv', delimiter = ' ', dtype = None))

	x = male100[:, 0] # Olympic years
	t = male100[:, 1] # Winning times

	# The following rescales x for numerical reasons
	# (In hw2 you are asked to fit 4th order polynomials, which involves
	# taking the 4th power of the input x values; taking the 4th power
	# of years gets large!)
	x = x - x[0]
	x = x/4

	# x (and t) is currently a column of a matrix object
	# the following 'flattens' x into an array we can iterate over
	flatx = np.asarray(x).flatten()

	################################################################
	## Linear model

	# Note: You will notice in the following code that there appear
	# two be "two versions" of the input x values that we range
	# over: (1) x itself (the scaled olympic years) and
	# (2) the pair of variables plotx and plotX.
	# The plotx and plotX are an approximation of running "continuously"
	# over the x-axis so that we can get a nice, smooth plot of
	# our linear model; this will be particularly noticable when
	# we plot higher-order polynomials

	# The following line creates a large number of equally spaced points
	# between just before the scaled Olympic years start and just after
	# they end. This gives us a set of points to use to plot our model
	# so that it looks like a smooth curve.
	plotx = np.asmatrix(np.linspace(flatx[0]-2, flatx[-1]+2, (flatx[-1]-flatx[0]+4)/0.01 + 1)).conj().transpose()

	# The following code generalizes to nth-order polynomial linear models.
	# The variable 'model_order' determines the order of the polynomial.
	# The default value is set to 1 for a first-order polynomial, i.e.,
	# a line embedded in 2-dimensions
	model_order = 4
	X_columns = model_order + 1

	# Create X (the 'design' matrix) of appropriate size for the input data
	X = np.asmatrix(np.zeros((x.shape[0], X_columns)))
	# Create the second set of values that will be used as the x values for making
	# a smooth plot of our model
	plotX = np.asmatrix(np.zeros((plotx.shape[0], X_columns)))
	for k in range(X_columns):
	# fill in the X matrix with the input values, to the appropriate polynomial power
	X[:,k] = np.power(x, k)
	# fill in the plotX matrix with the finer-grained x-axis values,
	# to the appropriate polynomial power
	plotX[:,k] = np.power(plotx, k)

	# Here's the key step!: create the matrix version of the normal equation(s)
	# The following python functions will be useful:
	# np.linalg.inv(X) <-- returns the inverse of X
	# X.transpose() <-- transposes X
	# X.conj() <-- not required, but this makes your math safe in case
	# the input values include complex numbers
	# (you do this before transpose)
	#ap=np.matrix(np.linalg.inv( np.dot(X.transpose(),X) + Nresultnp.identity(trainX[0].size) ))
	# w = np.dot(np.dot( ap,X.transpose() ),np.asarray(t))### FILL THIS IN

	a = np.dot(X.transpose(),X)
	N= t.size
	b= np.linalg.inv(a+Nresultnp.identity(a[0].size))
	c= np.dot(X.transpose() , np.asarray(t))
	w = np.dot(b,c)

	print "w:"
	print w

	## plt.close()

	# create figure 2
	plt.figure(0)
	# plot the input (x,t) data
	plt.scatter(np.asarray(x), np.asarray(t), edgecolor = 'b', color = 'w', marker = 'o')
	# plot the linear model itself as a smooth line
	plt.plot(plotx, plotX*w, color = 'r', linewidth = 2)
	# add labels to your figure!
	plt.xlabel('Olympic number (note, not year!)')
	plt.ylabel('Winning time')
	plt.title('Olympic data with best fit model')
	plt.show()
	# This last line may or may not be necessary to keep your matplotlib window open
	raw_input('Press <ENTER> to quit...')
No results found