Last active
February 17, 2026 16:27
-
-
Save cavedave/483414de03fa90915449d78a207ce053 to your computer and use it in GitHub Desktop.
us wedding costs by state data from https://www.markbroumand.com/pages/research-wedding-cost-and-marriage-length interestng paper diamonds are forever that goes into more individual data https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2501480
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| State | Average Wedding Cost | Average Marriage Length | |
|---|---|---|---|
| Alabama | 34000 | 20.3 | |
| Arizona | 32000 | 20.6 | |
| Arkansas | 25000 | 19.4 | |
| California | 41000 | 19.9 | |
| Colorado | 34000 | 17.9 | |
| Connecticut | 44000 | 21.4 | |
| Delaware | 39000 | 21.9 | |
| DC | 42000 | 10.8 | |
| Florida | 34000 | 20.3 | |
| Georgia | 30000 | 18.7 | |
| Idaho | 20000 | 19.8 | |
| Illinois | 39000 | 21.1 | |
| Indiana | 26000 | 20 | |
| Iowa | 24000 | 21.3 | |
| Kansas | 25000 | 20.2 | |
| Kentucky | 20000 | 19.3 | |
| Louisiana | 37000 | 20.2 | |
| Maine | 44000 | 22.4 | |
| Maryland | 39000 | 19.9 | |
| Massachusetts | 42000 | 20.7 | |
| Michigan | 29000 | 22.1 | |
| Minnesota | 31000 | 21.3 | |
| Mississippi | 33000 | 19.6 | |
| Missouri | 27000 | 20.3 | |
| Montana | 20000 | 20.8 | |
| Nebraska | 22000 | 20.8 | |
| Nevada | 21000 | 18.9 | |
| New Hampshire | 44000 | 22.6 | |
| New Jersey | 55000 | 20.5 | |
| New Mexico | 26000 | 20.3 | |
| New York | 49000 | 20.9 | |
| North Carolina | 31000 | 19.6 | |
| North Dakota | 22000 | 19.9 | |
| Ohio | 30000 | 21.1 | |
| Oklahoma | 25000 | 17.9 | |
| Oregon | 30000 | 19.7 | |
| Pennsylvania | 38000 | 22 | |
| Rhode Island | 44000 | 20.3 | |
| South Carolina | 39000 | 20.6 | |
| South Dakota | 23000 | 20.9 | |
| Tennessee | 28000 | 19.3 | |
| Texas | 32000 | 17.6 | |
| Utah | 17000 | 18 | |
| Vermont | 44000 | 23.1 | |
| Virginia | 38000 | 19.8 | |
| Washington | 30000 | 18.2 | |
| West Virginia | 36000 | 21.5 | |
| Wisconsin | 29000 | 22.7 | |
| Wyoming | 26000 | 19.9 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| df = pd.read_csv("/content/data-FdN4y.csv") | |
| plot_df = df.rename(columns={ | |
| "Average Wedding Cost": "wedding_cost_usd", | |
| "Average Marriage Length": "marriage_length_years", | |
| "State": "state" | |
| }).copy() | |
| x = plot_df["wedding_cost_usd"].to_numpy() | |
| y = plot_df["marriage_length_years"].to_numpy() | |
| corr = np.corrcoef(x, y)[0, 1] | |
| m, b = np.polyfit(x, y, 1) | |
| label_states = set() | |
| label_states.update(plot_df.nlargest(3, "wedding_cost_usd")["state"].tolist()) | |
| label_states.update(plot_df.nsmallest(3, "wedding_cost_usd")["state"].tolist()) | |
| label_states.update(plot_df.nlargest(3, "marriage_length_years")["state"].tolist()) | |
| label_states.update(plot_df.nsmallest(3, "marriage_length_years")["state"].tolist()) | |
| if "DC" in plot_df["state"].values: | |
| label_states.add("DC") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| ax.scatter(x, y) | |
| # Trend line | |
| xx = np.linspace(x.min(), x.max(), 200) | |
| ax.plot(xx, m*xx + b) | |
| ax.set_xlabel("Average wedding cost $") | |
| ax.set_ylabel("Average marriage length in years") | |
| ax.set_title("Wedding cost vs. marriage length (by US state)") | |
| # Correlation as a small in-plot note (top-left) | |
| ax.text( | |
| 0.02, 0.98, f"Correlation r = {corr:.2f}", | |
| transform=ax.transAxes, | |
| va="top", ha="left", | |
| fontsize=9 | |
| ) | |
| # Footer/source line (bottom-left, outside axes a touch) | |
| fig.text( | |
| 0.01, 0.01, | |
| "data: markbroumand graph: @iamreddave", | |
| ha="left", va="bottom", | |
| fontsize=9 | |
| ) | |
| # Labels for selected points | |
| for _, row in plot_df.iterrows(): | |
| st = row["state"] | |
| if st in label_states: | |
| ax.annotate( | |
| st, | |
| (row["wedding_cost_usd"], row["marriage_length_years"]), | |
| textcoords="offset points", | |
| xytext=(8, 6), | |
| ha="left", | |
| fontsize=9 | |
| ) | |
| plt.tight_layout(rect=[0, 0.03, 1, 1]) # leave room for footer | |
| plt.show() |
Author
cavedave
commented
Feb 17, 2026
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment