Created
December 21, 2012 03:51
-
-
Save suminb/4350568 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## fitpoly_incomplete.Py | |
| # Partial script to help get started in ISTA 421/521 Homework 2 | |
| #import sys | |
| #sys.path.reverse() | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| #from scipy.optimize import minimize | |
| from scipy.optimize import minimize_scalar | |
| def K_fordCV(Lambda): | |
| male100 = np.asmatrix(np.genfromtxt('/Users/sumin/dev/masters/ista521/homework2/data/synthdata.csv', delimiter = ' ', dtype = None)) | |
| x = male100[:, 0] # Olympic years | |
| t = male100[:, 1] # Winning times | |
| maxorder=4 | |
| N=x.size | |
| K=10 | |
| testx = np.linspace(x[0,0], x[x.size-1,0], 1001) # Large, independent test set | |
| testt = 5*testx**3 - testx**2 + testx + 150*np.random.randn(testx.shape[0], 1) | |
| # convert everything to matrices | |
| x = np.asmatrix(x) | |
| t = np.asmatrix(t) | |
| testx = np.asmatrix(testx).conj().transpose() | |
| testt = np.asmatrix(testt).conj().transpose() | |
| X = np.asmatrix(np.zeros(shape = (x.shape[0], maxorder + 1))) | |
| testX = np.asmatrix(np.zeros(shape = (testx.shape[0], maxorder + 1))) | |
| sizes = (N/K)*np.ones(K) | |
| csizes = np.zeros(sizes.shape[0] + 1) | |
| csizes[1:csizes.shape[0]] = sizes.cumsum() | |
| cv_loss = np.zeros((K, maxorder + 1)) | |
| for k in range(maxorder + 1): | |
| # print k | |
| X[:, k] = np.power(x, k) | |
| # print X | |
| testX[:, k] = np.power(testx, k) | |
| # print testX | |
| # print "start" | |
| for fold in range(K): | |
| # Partition the data | |
| # foldX contains the data for just one fold | |
| # trainX contains all other data | |
| foldX = X[csizes[fold]+1:csizes[fold+1]+1,0:k+1] | |
| trainX = np.copy(X[:, 0:k+1]) | |
| trainX = np.asmatrix(np.delete(trainX, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0)) | |
| foldt = t[csizes[fold]+1:csizes[fold+1]+1,:] | |
| traint = np.copy(t) | |
| traint = np.asmatrix(np.delete(traint, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0)) | |
| # print "p" | |
| # print trainX.conj().transpose()*trainX+N*Lambda*np.identity(trainX[0].size) | |
| # print "a" | |
| # print trainX.conj().transpose()*trainX | |
| # print "b" | |
| # print N*Lambda*np.identity(trainX[0].size) | |
| # print trainX.conj().transpose()*trainX+N*Lambda | |
| # print "c" | |
| # print trainX[0].size | |
| # print k | |
| # print "d" | |
| # print trainX.conj().transpose() | |
| # w = np.linalg.inv(trainX.conj().transpose()*trainX+N*Lambda)*trainX.conj().transpose()*traint | |
| w =np.linalg.inv(trainX.conj().transpose()*trainX+N*Lambda*np.identity(trainX[0].size))*trainX.conj().transpose()*traint | |
| fold_pred = foldX*w | |
| cv_loss[fold,k] = np.mean(np.power(fold_pred-foldt, 2)) | |
| # print cv_loss[fold,k] | |
| # print np.log(cv_loss) | |
| a=np.mean(np.log(cv_loss),0) | |
| #print a | |
| return a[k] | |
| f = K_fordCV | |
| res = minimize_scalar(f, method='brent') | |
| print res | |
| result= res.x | |
| # print result | |
| male100 = np.asmatrix(np.genfromtxt('../data/olympics.csv', delimiter = ' ', dtype = None)) | |
| x = male100[:, 0] # Olympic years | |
| t = male100[:, 1] # Winning times | |
| # The following rescales x for numerical reasons | |
| # (In hw2 you are asked to fit 4th order polynomials, which involves | |
| # taking the 4th power of the input x values; taking the 4th power | |
| # of years gets large!) | |
| x = x - x[0] | |
| x = x/4 | |
| # x (and t) is currently a column of a matrix object | |
| # the following 'flattens' x into an array we can iterate over | |
| flatx = np.asarray(x).flatten() | |
| ################################################################ | |
| ## Linear model | |
| # Note: You will notice in the following code that there appear | |
| # two be "two versions" of the input x values that we range | |
| # over: (1) x itself (the scaled olympic years) and | |
| # (2) the pair of variables plotx and plotX. | |
| # The plotx and plotX are an approximation of running "continuously" | |
| # over the x-axis so that we can get a nice, smooth plot of | |
| # our linear model; this will be particularly noticable when | |
| # we plot higher-order polynomials | |
| # The following line creates a large number of equally spaced points | |
| # between just before the scaled Olympic years start and just after | |
| # they end. This gives us a set of points to use to plot our model | |
| # so that it looks like a smooth curve. | |
| plotx = np.asmatrix(np.linspace(flatx[0]-2, flatx[-1]+2, (flatx[-1]-flatx[0]+4)/0.01 + 1)).conj().transpose() | |
| # The following code generalizes to nth-order polynomial linear models. | |
| # The variable 'model_order' determines the order of the polynomial. | |
| # The default value is set to 1 for a first-order polynomial, i.e., | |
| # a line embedded in 2-dimensions | |
| model_order = 4 | |
| X_columns = model_order + 1 | |
| # Create X (the 'design' matrix) of appropriate size for the input data | |
| X = np.asmatrix(np.zeros((x.shape[0], X_columns))) | |
| # Create the second set of values that will be used as the x values for making | |
| # a smooth plot of our model | |
| plotX = np.asmatrix(np.zeros((plotx.shape[0], X_columns))) | |
| for k in range(X_columns): | |
| # fill in the X matrix with the input values, to the appropriate polynomial power | |
| X[:,k] = np.power(x, k) | |
| # fill in the plotX matrix with the finer-grained x-axis values, | |
| # to the appropriate polynomial power | |
| plotX[:,k] = np.power(plotx, k) | |
| # Here's the key step!: create the matrix version of the normal equation(s) | |
| # The following python functions will be useful: | |
| # np.linalg.inv(X) <-- returns the inverse of X | |
| # X.transpose() <-- transposes X | |
| # X.conj() <-- not required, but this makes your math safe in case | |
| # the input values include complex numbers | |
| # (you do this before transpose) | |
| #ap=np.matrix(np.linalg.inv( np.dot(X.transpose(),X) + N*result*np.identity(trainX[0].size) )) | |
| # w = np.dot(np.dot( ap,X.transpose() ),np.asarray(t))### FILL THIS IN | |
| a = np.dot(X.transpose(),X) | |
| N= t.size | |
| b= np.linalg.inv(a+N*result*np.identity(a[0].size)) | |
| c= np.dot(X.transpose() , np.asarray(t)) | |
| w = np.dot(b,c) | |
| print "w:" | |
| print w | |
| ## plt.close() | |
| # create figure 2 | |
| plt.figure(0) | |
| # plot the input (x,t) data | |
| plt.scatter(np.asarray(x), np.asarray(t), edgecolor = 'b', color = 'w', marker = 'o') | |
| # plot the linear model itself as a smooth line | |
| plt.plot(plotx, plotX*w, color = 'r', linewidth = 2) | |
| # add labels to your figure! | |
| plt.xlabel('Olympic number (note, not year!)') | |
| plt.ylabel('Winning time') | |
| plt.title('Olympic data with best fit model') | |
| plt.show() | |
| # This last line may or may not be necessary to keep your matplotlib window open | |
| raw_input('Press <ENTER> to quit...') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment