Skip to content

Instantly share code, notes, and snippets.

@suminb
Created December 21, 2012 03:51
Show Gist options
  • Select an option

  • Save suminb/4350568 to your computer and use it in GitHub Desktop.

Select an option

Save suminb/4350568 to your computer and use it in GitHub Desktop.
## fitpoly_incomplete.Py
# Partial script to help get started in ISTA 421/521 Homework 2
#import sys
#sys.path.reverse()
import numpy as np
import matplotlib.pyplot as plt
#from scipy.optimize import minimize
from scipy.optimize import minimize_scalar
def K_fordCV(Lambda):
male100 = np.asmatrix(np.genfromtxt('/Users/sumin/dev/masters/ista521/homework2/data/synthdata.csv', delimiter = ' ', dtype = None))
x = male100[:, 0] # Olympic years
t = male100[:, 1] # Winning times
maxorder=4
N=x.size
K=10
testx = np.linspace(x[0,0], x[x.size-1,0], 1001) # Large, independent test set
testt = 5*testx**3 - testx**2 + testx + 150*np.random.randn(testx.shape[0], 1)
# convert everything to matrices
x = np.asmatrix(x)
t = np.asmatrix(t)
testx = np.asmatrix(testx).conj().transpose()
testt = np.asmatrix(testt).conj().transpose()
X = np.asmatrix(np.zeros(shape = (x.shape[0], maxorder + 1)))
testX = np.asmatrix(np.zeros(shape = (testx.shape[0], maxorder + 1)))
sizes = (N/K)*np.ones(K)
csizes = np.zeros(sizes.shape[0] + 1)
csizes[1:csizes.shape[0]] = sizes.cumsum()
cv_loss = np.zeros((K, maxorder + 1))
for k in range(maxorder + 1):
# print k
X[:, k] = np.power(x, k)
# print X
testX[:, k] = np.power(testx, k)
# print testX
# print "start"
for fold in range(K):
# Partition the data
# foldX contains the data for just one fold
# trainX contains all other data
foldX = X[csizes[fold]+1:csizes[fold+1]+1,0:k+1]
trainX = np.copy(X[:, 0:k+1])
trainX = np.asmatrix(np.delete(trainX, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0))
foldt = t[csizes[fold]+1:csizes[fold+1]+1,:]
traint = np.copy(t)
traint = np.asmatrix(np.delete(traint, np.arange(csizes[fold]+1, csizes[fold+1]+1), 0))
# print "p"
# print trainX.conj().transpose()*trainX+N*Lambda*np.identity(trainX[0].size)
# print "a"
# print trainX.conj().transpose()*trainX
# print "b"
# print N*Lambda*np.identity(trainX[0].size)
# print trainX.conj().transpose()*trainX+N*Lambda
# print "c"
# print trainX[0].size
# print k
# print "d"
# print trainX.conj().transpose()
# w = np.linalg.inv(trainX.conj().transpose()*trainX+N*Lambda)*trainX.conj().transpose()*traint
w =np.linalg.inv(trainX.conj().transpose()*trainX+N*Lambda*np.identity(trainX[0].size))*trainX.conj().transpose()*traint
fold_pred = foldX*w
cv_loss[fold,k] = np.mean(np.power(fold_pred-foldt, 2))
# print cv_loss[fold,k]
# print np.log(cv_loss)
a=np.mean(np.log(cv_loss),0)
#print a
return a[k]
f = K_fordCV
res = minimize_scalar(f, method='brent')
print res
result= res.x
# print result
male100 = np.asmatrix(np.genfromtxt('../data/olympics.csv', delimiter = ' ', dtype = None))
x = male100[:, 0] # Olympic years
t = male100[:, 1] # Winning times
# The following rescales x for numerical reasons
# (In hw2 you are asked to fit 4th order polynomials, which involves
# taking the 4th power of the input x values; taking the 4th power
# of years gets large!)
x = x - x[0]
x = x/4
# x (and t) is currently a column of a matrix object
# the following 'flattens' x into an array we can iterate over
flatx = np.asarray(x).flatten()
################################################################
## Linear model
# Note: You will notice in the following code that there appear
# two be "two versions" of the input x values that we range
# over: (1) x itself (the scaled olympic years) and
# (2) the pair of variables plotx and plotX.
# The plotx and plotX are an approximation of running "continuously"
# over the x-axis so that we can get a nice, smooth plot of
# our linear model; this will be particularly noticable when
# we plot higher-order polynomials
# The following line creates a large number of equally spaced points
# between just before the scaled Olympic years start and just after
# they end. This gives us a set of points to use to plot our model
# so that it looks like a smooth curve.
plotx = np.asmatrix(np.linspace(flatx[0]-2, flatx[-1]+2, (flatx[-1]-flatx[0]+4)/0.01 + 1)).conj().transpose()
# The following code generalizes to nth-order polynomial linear models.
# The variable 'model_order' determines the order of the polynomial.
# The default value is set to 1 for a first-order polynomial, i.e.,
# a line embedded in 2-dimensions
model_order = 4
X_columns = model_order + 1
# Create X (the 'design' matrix) of appropriate size for the input data
X = np.asmatrix(np.zeros((x.shape[0], X_columns)))
# Create the second set of values that will be used as the x values for making
# a smooth plot of our model
plotX = np.asmatrix(np.zeros((plotx.shape[0], X_columns)))
for k in range(X_columns):
# fill in the X matrix with the input values, to the appropriate polynomial power
X[:,k] = np.power(x, k)
# fill in the plotX matrix with the finer-grained x-axis values,
# to the appropriate polynomial power
plotX[:,k] = np.power(plotx, k)
# Here's the key step!: create the matrix version of the normal equation(s)
# The following python functions will be useful:
# np.linalg.inv(X) <-- returns the inverse of X
# X.transpose() <-- transposes X
# X.conj() <-- not required, but this makes your math safe in case
# the input values include complex numbers
# (you do this before transpose)
#ap=np.matrix(np.linalg.inv( np.dot(X.transpose(),X) + N*result*np.identity(trainX[0].size) ))
# w = np.dot(np.dot( ap,X.transpose() ),np.asarray(t))### FILL THIS IN
a = np.dot(X.transpose(),X)
N= t.size
b= np.linalg.inv(a+N*result*np.identity(a[0].size))
c= np.dot(X.transpose() , np.asarray(t))
w = np.dot(b,c)
print "w:"
print w
## plt.close()
# create figure 2
plt.figure(0)
# plot the input (x,t) data
plt.scatter(np.asarray(x), np.asarray(t), edgecolor = 'b', color = 'w', marker = 'o')
# plot the linear model itself as a smooth line
plt.plot(plotx, plotX*w, color = 'r', linewidth = 2)
# add labels to your figure!
plt.xlabel('Olympic number (note, not year!)')
plt.ylabel('Winning time')
plt.title('Olympic data with best fit model')
plt.show()
# This last line may or may not be necessary to keep your matplotlib window open
raw_input('Press <ENTER> to quit...')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment