Skip to content

Instantly share code, notes, and snippets.

@tado
Created February 27, 2025 23:57
Show Gist options
  • Save tado/ae4e1886874ec21d3c1c70b8fc6b558a to your computer and use it in GitHub Desktop.
Save tado/ae4e1886874ec21d3c1c70b8fc6b558a to your computer and use it in GitHub Desktop.
Plot iris petal scatter with regression with python
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.patches import Ellipse
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
def plot_iris_petal_scatter_with_regression():
"""
Downloads Iris dataset, extracts petal data, creates a scatter plot with regression,
overlays a filled ellipse for each class based on petal distribution.
"""
try:
# Download the Iris dataset from the UCI Machine Learning Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
df = pd.read_csv(url, names=column_names)
# Extract petal data (petal length and petal width)
petal_data = df[["petal_length", "petal_width"]]
# Prepare data for linear regression
X = petal_data[["petal_length"]]
y = petal_data["petal_width"]
# Create and train the linear regression model
model = LinearRegression()
model.fit(X, y)
# Calculate the R-squared value
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
print(f"R-squared (R^2): {r2}")
# Generate predictions for plotting the regression line
x_range = pd.DataFrame({
"petal_length": [petal_data["petal_length"].min(), petal_data["petal_length"].max()]
})
y_range = model.predict(x_range)
# Create scatter plot with class differentiation using seaborn
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x="petal_length", y="petal_width", hue="class", s=100)
# Plot the linear regression line
plt.plot(x_range, y_range, color="red", linewidth=2, label="Regression Line")
# Overlay a half-transparent filled ellipse for each class
ax = plt.gca()
unique_classes = df["class"].unique()
palette = sns.color_palette("deep", n_colors=len(unique_classes))
color_map = dict(zip(unique_classes, palette))
for cls in unique_classes:
sub_df = df[df["class"] == cls]
x_vals = sub_df["petal_length"].values
y_vals = sub_df["petal_width"].values
# Compute the mean of x and y
mean_x, mean_y = x_vals.mean(), y_vals.mean()
# Compute the covariance matrix
cov = np.cov(x_vals, y_vals)
# Compute the eigenvalues and eigenvectors
eigvals, eigvecs = np.linalg.eigh(cov)
# Compute the angle of the ellipse
angle = np.degrees(np.arctan2(*eigvecs[:, 0][::-1]))
# Scale the eigenvalues by 4 to enclose most of the data distribution
width, height = 4 * np.sqrt(eigvals)
ellipse = Ellipse((mean_x, mean_y),
width=width,
height=height,
angle=angle,
facecolor=color_map[cls],
alpha=0.3,
edgecolor=None,
label=f"{cls} region")
ax.add_patch(ellipse)
plt.title("Iris Petal Scatter Plot by Class with Regression and Regions")
plt.xlabel("Petal Length")
plt.ylabel("Petal Width")
plt.legend(title="Class")
plt.grid(True)
plt.show()
except Exception as e:
print(f"An error occurred: {e}")
print("Please check your internet connection or the data download.")
# Run the function to create and show the plot.
plot_iris_petal_scatter_with_regression()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment