Skip to content

Instantly share code, notes, and snippets.

@amroamroamro
Last active August 12, 2024 14:12
Show Gist options
  • Save amroamroamro/1db8d69b4b65e8bc66a6 to your computer and use it in GitHub Desktop.
Save amroamroamro/1db8d69b4b65e8bc66a6 to your computer and use it in GitHub Desktop.
[Python] Fitting plane/surface to a set of data points

Python version of the MATLAB code in this Stack Overflow post: https://stackoverflow.com/a/18648210/97160

The example shows how to determine the best-fit plane/surface (1st or higher order polynomial) over a set of three-dimensional points.

Implemented in Python + NumPy + SciPy + matplotlib.

quadratic_surface


EDIT (2023-06-16)

I added a new example fit.py that shows polynomial fitting of any n-th order, as well as the same thing but using scikit-learn functions fit-sklearn.py.

peaks

#!/usr/bin/evn python
import numpy as np
import scipy.linalg
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# some 3-dim points
mean = np.array([0.0,0.0,0.0])
cov = np.array([[1.0,-0.5,0.8], [-0.5,1.1,0.0], [0.8,0.0,1.0]])
data = np.random.multivariate_normal(mean, cov, 50)
# regular grid covering the domain of the data
X,Y = np.meshgrid(np.arange(-3.0, 3.0, 0.5), np.arange(-3.0, 3.0, 0.5))
XX = X.flatten()
YY = Y.flatten()
order = 1 # 1: linear, 2: quadratic
if order == 1:
# best-fit linear plane
A = np.c_[data[:,0], data[:,1], np.ones(data.shape[0])]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2]) # coefficients
# evaluate it on grid
Z = C[0]*X + C[1]*Y + C[2]
# or expressed using matrix/vector product
#Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape)
elif order == 2:
# best-fit quadratic curve
A = np.c_[np.ones(data.shape[0]), data[:,:2], np.prod(data[:,:2], axis=1), data[:,:2]**2]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape)
# plot points and fitted surface
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)**2 * np.exp(- X**2 - (Y+1)**2) \
- 10 * (X/5 - X**3 - Y**5) * np.exp(- X**2 - Y**2) \
- 1/3 * np.exp(- (X+1)**2 - Y**2)
return X, Y, Z
def names2model(names):
# C[i] * X^n * Y^m
return ' + '.join([
f"C[{i}]*{n.replace(' ','*')}"
for i,n in enumerate(names)])
# generate some random 3-dim points
X, Y, Z = generateData()
# 1=linear, 2=quadratic, 3=cubic, ..., nth degree
order = 11
# best-fit polynomial surface
model = make_pipeline(
PolynomialFeatures(degree=order),
LinearRegression(fit_intercept=False))
model.fit(np.c_[X, Y], Z)
m = names2model(model[0].get_feature_names_out(['X', 'Y']))
C = model[1].coef_.T # coefficients
r2 = model.score(np.c_[X, Y], Z) # R-squared
# print summary
print(f'data = {Z.size}x3')
print(f'model = {m}')
print(f'coefficients =\n{C}')
print(f'R2 = {r2}')
# uniform grid covering the domain of the data
XX,YY = np.meshgrid(np.linspace(X.min(), X.max(), 20), np.linspace(Y.min(), Y.max(), 20))
# evaluate model on grid
ZZ = model.predict(np.c_[XX.flatten(), YY.flatten()]).reshape(XX.shape)
# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.scatter(X, Y, Z, c='r', s=2)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')
ax.axis('tight')
ax.view_init(azim=-60.0, elev=30.0)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
import numpy as np
from scipy.linalg import lstsq
import matplotlib.pyplot as plt
def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)**2 * np.exp(- X**2 - (Y+1)**2) \
- 10 * (X/5 - X**3 - Y**5) * np.exp(- X**2 - Y**2) \
- 1/3 * np.exp(- (X+1)**2 - Y**2)
return X, Y, Z
def exp2model(e):
# C[i] * X^n * Y^m
return ' + '.join([
f'C[{i}]' +
('*' if x>0 or y>0 else '') +
(f'X^{x}' if x>1 else 'X' if x==1 else '') +
('*' if x>0 and y>0 else '') +
(f'Y^{y}' if y>1 else 'Y' if y==1 else '')
for i,(x,y) in enumerate(e)
])
# generate some random 3-dim points
X, Y, Z = generateData()
# 1=linear, 2=quadratic, 3=cubic, ..., nth degree
order = 11
# calculate exponents of design matrix
#e = [(x,y) for x in range(0,order+1) for y in range(0,order-x+1)]
e = [(x,y) for n in range(0,order+1) for y in range(0,n+1) for x in range(0,n+1) if x+y==n]
eX = np.asarray([[x] for x,_ in e]).T
eY = np.asarray([[y] for _,y in e]).T
# best-fit polynomial surface
A = (X ** eX) * (Y ** eY)
C,resid,_,_ = lstsq(A, Z) # coefficients
# calculate R-squared from residual error
r2 = 1 - resid[0] / (Z.size * Z.var())
# print summary
print(f'data = {Z.size}x3')
print(f'model = {exp2model(e)}')
print(f'coefficients =\n{C}')
print(f'R2 = {r2}')
# uniform grid covering the domain of the data
XX,YY = np.meshgrid(np.linspace(X.min(), X.max(), 20), np.linspace(Y.min(), Y.max(), 20))
# evaluate model on grid
A = (XX.reshape(-1,1) ** eX) * (YY.reshape(-1,1) ** eY)
ZZ = np.dot(A, C).reshape(XX.shape)
# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.scatter(X, Y, Z, c='r', s=2)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')
ax.axis('tight')
ax.view_init(azim=-60.0, elev=30.0)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
@lukaszprawdziwy
Copy link

lukaszprawdziwy commented Sep 16, 2021

Hello,
Thank you for the handy code.
Is there a way to fit a surface with an exponential function?
I think of something like:
c[0] + c[1] * x + c[2] * y+ c[3] * np.exp(c[4] * x) + c[5] * (np.exp(c[6] * y)

how to feed it to scipy.linalg.lstsq?

thanks

@Mozzaurel
Copy link

Thank you @alexlib for the cubic version.
There is one typo error inside : the last coeff in A should be data[:,1]**3 and not data[:,2]**3
With that it works fine!

elif order == 3:
    # M = [ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]
    A = np.c_[np.ones(data.shape[0]), data[:,:2], data[:,0]**2, np.prod(data[:,:2], axis=1), \
              data[:,1]**2, data[:,0]**3, np.prod(np.c_[data[:,0]**2,data[:,1]],axis=1), \
              np.prod(np.c_[data[:,0],data[:,1]**2],axis=1), data[:,1]**3]
    C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
    
    Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX**2, XX*YY, YY**2, XX**3, XX**2*YY, XX*YY**2, YY**3], C).reshape(X.shape)

@alexlib
Copy link

alexlib commented Jan 26, 2022

@jeffreyi03
Copy link

@alexlib

I just used your code to get the cubic order. Just to be sure when I print out C i get 10 values. What is the order for the coefficients and what does the equation look like? I don't want to get this wrong.

Thank you and good job!

@jeffreyi03
Copy link

@mirgilani Did you figure this out? Can you tell me the correct order of coefficients?

@alexlib
Copy link

alexlib commented Jun 22, 2022

[ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]

@jeffreyi03
Copy link

jeffreyi03 commented Jun 22, 2022 via email

@alexlib
Copy link

alexlib commented Jun 22, 2022

Just to make sure I understand, C[0]+C[1]x+C[2]y+C[3]x^2+C[4]xy.... ?

Yes

@jensleitloff
Copy link

jensleitloff commented May 10, 2023

You can easily use any polynomal order without implementing it explicitly using sklearn.preprocessing.PolynomialFeatures and sklearn.linear_model.LinearRegression:
https://stackoverflow.com/a/71214371
Results are identical and you could also combine it with robust RANSAC by simply replacing LinearRegression.

@ditia298
Copy link

Thank you for the excellent python solution for fitting a surface. I just wanted to know, if I may, what if we have weights on the datapoints? for instance, x1, y1 comes with weight w1, x2, y2 with w2 etc. I need to take weight into accont while fitting the surface. Could you please enlighten me on how can I achieve that?

@jensleitloff
Copy link

jensleitloff commented Jun 16, 2023

This is possible using PolynomialFeatures (as intended) as preprocessing and e.g. LinearRegression or Ridge or HuberRegressor or any other regressor which supports sample weights for subsequent regression. Thus, it is a two step process as already show my previous posted link to stackoverflow: https://stackoverflow.com/a/71214371
First, you generate all possible polynomial features of your input points (x1, x2 ...) using PolynomialFeatures. The input points (x1, x2 ...) can be multidimensional, e.g. 2-dimensional for 3-dimensional fitting (see following example). Afterwards you fit (e.g. fit LinearRegression) your polynomial features to (y1, y2 ...) with corresponding weights (w1, w2 ...).
The best way to perform this 2-step process is to use make_pipeline. A nice example is given in Robust linear estimator fitting.
Using the example data from a previous post of amroamroamro, the complete example would look like this:

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# your 3-dim data
x = [13.7, 14.3, 14.8, 15.3]
y = [4436.95646, 4311.55363, 4299.82329, 4164.97164]
z = [4774.21409, 4688.34413, 4687.26375, 4587.78392]
data = np.c_[x,y,z]
# weights
w = [1, 2, 1 , 0.5]
# e.g. quadratic polynom
order = 2  

# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()

# fitting with make_pipeline
model = make_pipeline(PolynomialFeatures(degree=order), LinearRegression())
model.fit(data[:, :2], data[:, -1], linearregression__sample_weight=w)
ZZ = model.predict(np.c_[XX, YY])

# fitting without make_pipeline
poly = PolynomialFeatures(degree=order)
data_poly = poly.fit_transform(data[:, :2])
model_poly = LinearRegression()
model_poly.fit(data_poly, data[:, -1], sample_weight=w)
ZZ_poly = model_poly.predict(poly.transform(np.c_[XX, YY]))

assert(np.all(ZZ==ZZ_poly))

You can use the models to estimate any z-value for given (error-free) x,y-values.

@jensleitloff
Copy link

Here is the original example with (random) weights and the above procedure:

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt

USE_WEIGHTS = True

# some 3-dim points
mean = np.array([0.0,0.0,0.0])
cov = np.array([[1.0,-0.5,0.8], [-0.5,1.1,0.0], [0.8,0.0,1.0]])
data = np.random.multivariate_normal(mean, cov, 50)
if USE_WEIGHTS:
    # weights can't be negative
    w = np.abs(np.random.normal(loc=1, scale=1, size=50))
else:
    w = np.ones(shape=50)

# regular grid covering the domain of the data
X,Y = np.meshgrid(np.arange(-3.0, 3.0, 0.5), np.arange(-3.0, 3.0, 0.5))
XX = X.flatten()
YY = Y.flatten()

order = 2    # 1: linear, 2: quadratic
model = make_pipeline(PolynomialFeatures(degree=order), LinearRegression())
model.fit(data[:, :2], data[:, -1], linearregression__sample_weight=w)
Z = model.predict(np.c_[XX, YY]).reshape(X.shape)

# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()

@amroamroamro
Copy link
Author

@jensleitloff see the new update fit.py (and the same thing in fit-sklearn.py using sklearn functions)

@GPXue
Copy link

GPXue commented Aug 12, 2024

@madgrizzle

I took the time to linearize this for up to the nth degree over the afternoon. You will want to make sure that all your data has 64 bit datatype (ex: np.int64(x) or np.float64(z)) because at higher degree calculations things will begin to break when overflow warnings come up. This is what I threw together:

# functions

# in: [meshgrid], [meshgrid], [np list of coordinate pair np lists. ex: [[x1,y1,z1], [x2,y2,z2], etc.] ], [degree]
# out: [Z]
def curve(X, Y, coord, n):
    XX = X.flatten()
    YY = Y.flatten()

    # best-fit curve
    A = XYchooseN(coord[:,0], coord[:,1], n)
    C,_,_,_ = scipy.linalg.lstsq(A, coord[:,2])
    print("Calculating C in curve() done")
    # evaluate it on a grid
    Z = np.dot(XYchooseN(XX, YY, n), C).reshape(X.shape)
    return Z

# in: [array], [array], [int]
# out: sum from k=0 to k=n of n choose k for x^n-k * y^k (coefficients ignored)
def XYchooseN(x,y,n):
    XYchooseN = []
    n = n+1
    for j in range(len(x)):
        I = x[j]
        J = y[j]
        matrix = []
        Is = []
        Js = []
        for i in range(0,n):
            Is.append(I**i)
            Js.append(J**i)
            matrix.append(np.concatenate((np.ones(n-i),np.zeros(i))))
        Is = np.array(Is)
        Js = np.array(Js)[np.newaxis]
        IsJs0s = matrix * Is * Js.T
        IsJs = []
        for i in range(0,n):
            IsJs = np.concatenate((IsJs,IsJs0s[i,:n-i]))
        XYchooseN.append(IsJs)
    return np.array(XYchooseN)

X,Y = np.meshgrid(x_data,y_data) # surface meshgrid

Z = curve(X, Y, coord, 3)

# todo: cmap by avg from (x1,y1) to (x2,y2) of |Z height - scatter height|

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.5, color='r')

Thanks MSchmidt99!
That's good. I added a bit, which users can try this code directly. In the follow:
import numpy as np
from scipy.linalg import lstsq
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)2 * np.exp(- X2 - (Y+1)2)
- 10 * (X/5 - X
3 - Y5) * np.exp(- X2 - Y**2)
- 1/3 * np.exp(- (X+1)2 - Y2)
data_lt = []
for ee in range(len(X)):
data_lt.append([float(X[ee]),float(Y[ee]),float(Z[ee])])
return np.array(data_lt)

in: [meshgrid], [meshgrid], [np list of coordinate pair np lists. ex: [[x1,y1,z1], [x2,y2,z2], etc.] ], [degree]

out: [Z]

def curve(X, Y, coord, n):
XX = X.flatten()
YY = Y.flatten()

# best-fit curve
A = XYchooseN(coord[:,0], coord[:,1], n)
C,_,_,_ = lstsq(A, coord[:,2])
print("Calculating C in curve() done")
# evaluate it on a grid
Z = np.dot(XYchooseN(XX, YY, n), C).reshape(X.shape)
return Z

in: [array], [array], [int]

out: sum from k=0 to k=n of n choose k for x^n-k * y^k (coefficients ignored)

def XYchooseN(x,y,n):
XYchooseN = []
n = n+1
for j in range(len(x)):
I = x[j]
J = y[j]
matrix = []
Is = []
Js = []
for i in range(0,n):
Is.append(Ii)
Js.append(J
i)
matrix.append(np.concatenate((np.ones(n-i),np.zeros(i))))
Is = np.array(Is)
Js = np.array(Js)[np.newaxis]
IsJs0s = matrix * Is * Js.T
IsJs = []
for i in range(0,n):
IsJs = np.concatenate((IsJs,IsJs0s[i,:n-i]))
XYchooseN.append(IsJs)
return np.array(XYchooseN)

data = generateData()

generate some random 3-dim points

x_data, y_data, z_data = data[:,0],data[:,1],data[:,2]

X,Y = np.meshgrid(x_data,y_data) # surface meshgrid

coord = data

Z = curve(X, Y, coord, 3)

todo: cmap by avg from (x1,y1) to (x2,y2) of |Z height - scatter height|

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d') # Create a 3D axis

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.5, color='r')
ax.scatter(data[:,0], data[:,1], data[:,2], c='green', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment