Skip to content

Instantly share code, notes, and snippets.

@eob
Created April 22, 2014 02:14
Show Gist options
  • Save eob/11163203 to your computer and use it in GitHub Desktop.
Save eob/11163203 to your computer and use it in GitHub Desktop.
Polynomial Fitting
from scipy.optimize import curve_fit
from collections import Counter, defaultdict
import numpy as np
import time
def f(xss, a, b, c, d, e, f):
"""
Polynomial function up to order 5
"""
ret = []
for x in xss:
y = 0
y = a + b*x + c*x**2 + d*x**3 + e*x**4 + f*x**5
ret.append(y)
return ret
def super_f(xss, coeffs):
ret = []
for x in xss:
y = 0
for (idx, coeff) in enumerate(coeffs):
y += coeff * (x**idx)
ret.append(y)
return ret
def mse(xs, ys):
if len(xs) == 0:
print "HEY! Trying to mse empty list, you fool!"
return 0
ret = 0
for (x,y) in zip(xs, ys):
ret += (x-y)*(x-y)
ret /= len(xs)
def mse_at_order(xs, ys, degree, folds, train_percent):
"""
Returns the average mean squared error
"""
errors = []
for fold in range(len(folds)):
# Todo: shuffle and divide
x_copy = xs[:]
y_copy = ys[:]
shuffle(x_copy)
shuffle(y_copy)
train_percent * len(x_copy)
xs_train = xs
ys_train = ys
xs_test = xs
ys_test = ys
tup = np.polyfit(xs_train, ys_train, degree, full=True)
coeffs = tup[0]
estimated_costs = super_f(xs_test, coeffs)
error = mse(estimated_costs, ys_test)
errors.append(error)
return np.mean(errors)
def learn_polynomial(feature, datapoints, maxpersize=-1):
"""
Given a feature and a set of different _sized_ datapoints
learn the extraction-cost polynomial
Args:
datapoints: list of datapoint objects that have a .size attr
maxpersize: max number of extractions for a given data point size
"""
vals = defaultdict(list)
for datapoint in datapoints:
x = datapoint.size
if maxpersize == -1:
if len(vals[x]) > 10: continue
start = time.time()
feature.extract(datapoint)
stop = time.time()
y = stop - start
vals[x].append(y)
# xs -> sizes, ys -> avg cost for size
xs = np.array(vals.keys())
ys = np.array([np.median(vals[x]) for x in xs])
# try to use lower degrees if possible
cost_per_coeffs = []
deg = 0
while deg < 6:
tup = np.polyfit(xs, ys, deg, full=True)
coeffs = tup[0]
res = tup[1]
cost_per_coeffs.append((sum(res) / (.95 ** deg), coeffs))
if abs(sum(res)) <= 1e-4:
break
deg += 1
# Take the minimum cost list of coeffs.
# Min of a list of tuples uses the first slow.
coeffs = list(min(cost_per_coeffs)[1])
coeffs.reverse()
# Plotting
# -----------------------------------------------------
if False:
allxs, allys = [], []
from util import *
poly = create_polynomial(coeffs)
from matplotlib import pyplot as plt
for key, vs in vals.iteritems():
for v in vs:
allxs.append(key)
allys.append(v)
plt.clf()
plt.scatter(allxs, allys, color='red', alpha=0.5, lw=0)
plt.plot(xs, ys, color='red', alpha=0.4)
plt.plot(xs, map(poly, xs), color='green', alpha=0.4)
plt.show()
import pdb
pdb.set_trace()
return coeffs
# ignoreme
#popt, pcov = curve_fit(f, xs, ys)
#return map(float, popt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment