Created
February 27, 2025 23:57
-
-
Save tado/ae4e1886874ec21d3c1c70b8fc6b558a to your computer and use it in GitHub Desktop.
Plot iris petal scatter with regression with python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
from matplotlib.patches import Ellipse | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import r2_score | |
def plot_iris_petal_scatter_with_regression(): | |
""" | |
Downloads Iris dataset, extracts petal data, creates a scatter plot with regression, | |
overlays a filled ellipse for each class based on petal distribution. | |
""" | |
try: | |
# Download the Iris dataset from the UCI Machine Learning Repository | |
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" | |
column_names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"] | |
df = pd.read_csv(url, names=column_names) | |
# Extract petal data (petal length and petal width) | |
petal_data = df[["petal_length", "petal_width"]] | |
# Prepare data for linear regression | |
X = petal_data[["petal_length"]] | |
y = petal_data["petal_width"] | |
# Create and train the linear regression model | |
model = LinearRegression() | |
model.fit(X, y) | |
# Calculate the R-squared value | |
y_pred = model.predict(X) | |
r2 = r2_score(y, y_pred) | |
print(f"R-squared (R^2): {r2}") | |
# Generate predictions for plotting the regression line | |
x_range = pd.DataFrame({ | |
"petal_length": [petal_data["petal_length"].min(), petal_data["petal_length"].max()] | |
}) | |
y_range = model.predict(x_range) | |
# Create scatter plot with class differentiation using seaborn | |
plt.figure(figsize=(10, 6)) | |
sns.scatterplot(data=df, x="petal_length", y="petal_width", hue="class", s=100) | |
# Plot the linear regression line | |
plt.plot(x_range, y_range, color="red", linewidth=2, label="Regression Line") | |
# Overlay a half-transparent filled ellipse for each class | |
ax = plt.gca() | |
unique_classes = df["class"].unique() | |
palette = sns.color_palette("deep", n_colors=len(unique_classes)) | |
color_map = dict(zip(unique_classes, palette)) | |
for cls in unique_classes: | |
sub_df = df[df["class"] == cls] | |
x_vals = sub_df["petal_length"].values | |
y_vals = sub_df["petal_width"].values | |
# Compute the mean of x and y | |
mean_x, mean_y = x_vals.mean(), y_vals.mean() | |
# Compute the covariance matrix | |
cov = np.cov(x_vals, y_vals) | |
# Compute the eigenvalues and eigenvectors | |
eigvals, eigvecs = np.linalg.eigh(cov) | |
# Compute the angle of the ellipse | |
angle = np.degrees(np.arctan2(*eigvecs[:, 0][::-1])) | |
# Scale the eigenvalues by 4 to enclose most of the data distribution | |
width, height = 4 * np.sqrt(eigvals) | |
ellipse = Ellipse((mean_x, mean_y), | |
width=width, | |
height=height, | |
angle=angle, | |
facecolor=color_map[cls], | |
alpha=0.3, | |
edgecolor=None, | |
label=f"{cls} region") | |
ax.add_patch(ellipse) | |
plt.title("Iris Petal Scatter Plot by Class with Regression and Regions") | |
plt.xlabel("Petal Length") | |
plt.ylabel("Petal Width") | |
plt.legend(title="Class") | |
plt.grid(True) | |
plt.show() | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
print("Please check your internet connection or the data download.") | |
# Run the function to create and show the plot. | |
plot_iris_petal_scatter_with_regression() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment