Created
June 4, 2026 04:25
-
-
Save zbowling/3c5efe3a6dd48800c8214a8a3571ad1d to your computer and use it in GitHub Desktop.
gov_race_map.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.11" | |
| # dependencies = [ | |
| # "requests", | |
| # "geopandas", | |
| # "matplotlib", | |
| # "pandas", | |
| # "shapely", | |
| # ] | |
| # /// | |
| """ | |
| California governor primary — county map of Dem vs Rep vote share. | |
| Re-run any time as more results arrive: | |
| uv run ca_gov_map.py | |
| Outputs: | |
| ca_gov_map.png choropleth (dark blue Dem -> white -> dark red Rep) | |
| ca_gov_results.csv per-county Dem/Rep totals + margin | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import sys | |
| import time | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from pathlib import Path | |
| import geopandas as gpd | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import requests | |
| from matplotlib.colors import TwoSlopeNorm | |
| COUNTIES = [ | |
| "Alameda", "Alpine", "Amador", "Butte", "Calaveras", "Colusa", | |
| "Contra Costa", "Del Norte", "El Dorado", "Fresno", "Glenn", | |
| "Humboldt", "Imperial", "Inyo", "Kern", "Kings", "Lake", "Lassen", | |
| "Los Angeles", "Madera", "Marin", "Mariposa", "Mendocino", "Merced", | |
| "Modoc", "Mono", "Monterey", "Napa", "Nevada", "Orange", "Placer", | |
| "Plumas", "Riverside", "Sacramento", "San Benito", "San Bernardino", | |
| "San Diego", "San Francisco", "San Joaquin", "San Luis Obispo", | |
| "San Mateo", "Santa Barbara", "Santa Clara", "Santa Cruz", "Shasta", | |
| "Sierra", "Siskiyou", "Solano", "Sonoma", "Stanislaus", "Sutter", | |
| "Tehama", "Trinity", "Tulare", "Tuolumne", "Ventura", "Yolo", "Yuba", | |
| ] | |
| API_BASE = "https://api.sos.ca.gov/returns/governor/county" | |
| GEOJSON_URL = ( | |
| "https://raw.githubusercontent.com/codeforgermany/click_that_hood/" | |
| "main/public/data/california-counties.geojson" | |
| ) | |
| HERE = Path(__file__).resolve().parent | |
| GEOJSON_CACHE = HERE / "california-counties.geojson" | |
| def slugify(name: str) -> str: | |
| # The SoS API uses hyphens for multi-word counties (e.g. san-mateo). | |
| # Concatenated slugs (sanmateo) silently return the statewide payload | |
| # instead of 404'ing, which would corrupt aggregates — so hyphenate. | |
| return name.lower().replace(" ", "-") | |
| def parse_votes(v: str) -> int: | |
| return int(v.replace(",", "").strip() or 0) | |
| def fetch_county(name: str, session: requests.Session) -> dict: | |
| url = f"{API_BASE}/{slugify(name)}" | |
| r = session.get(url, timeout=20) | |
| r.raise_for_status() | |
| data = r.json() | |
| payload = data[0] if isinstance(data, list) else data | |
| title = payload.get("raceTitle", "") | |
| if name.lower() not in title.lower(): | |
| raise RuntimeError( | |
| f"slug mismatch for {name!r}: API returned raceTitle={title!r} " | |
| f"(likely got statewide instead of county data — check slug)" | |
| ) | |
| candidates = payload.get("candidates", []) | |
| dem = rep = other = top_votes = 0 | |
| top_name = top_party = "" | |
| for c in candidates: | |
| votes = parse_votes(c.get("Votes", "0")) | |
| party = c.get("Party", "") | |
| if party == "Dem": | |
| dem += votes | |
| elif party == "Rep": | |
| rep += votes | |
| else: | |
| other += votes | |
| if votes > top_votes: | |
| top_votes = votes | |
| top_name = c.get("Name", "") | |
| top_party = party | |
| return { | |
| "county": name, | |
| "dem": dem, | |
| "rep": rep, | |
| "other": other, | |
| "total": dem + rep + other, | |
| "top_candidate": top_name, | |
| "top_party": top_party, | |
| "top_votes": top_votes, | |
| "reporting": payload.get("Reporting", ""), | |
| "reporting_time": payload.get("ReportingTime", ""), | |
| } | |
| def fetch_all(verbose: bool = True) -> pd.DataFrame: | |
| rows: list[dict] = [] | |
| errors: list[tuple[str, str]] = [] | |
| with requests.Session() as s: | |
| s.headers["User-Agent"] = "ca-gov-map/1.0" | |
| with ThreadPoolExecutor(max_workers=8) as pool: | |
| futures = {pool.submit(fetch_county, c, s): c for c in COUNTIES} | |
| for fut in as_completed(futures): | |
| county = futures[fut] | |
| try: | |
| rows.append(fut.result()) | |
| if verbose: | |
| print(f" fetched {county}") | |
| except Exception as e: | |
| errors.append((county, str(e))) | |
| print(f" FAILED {county}: {e}", file=sys.stderr) | |
| if errors: | |
| print(f"\n{len(errors)} county fetch(es) failed.", file=sys.stderr) | |
| df = pd.DataFrame(rows).sort_values("county").reset_index(drop=True) | |
| df["dem_pct"] = df["dem"] / df["total"].where(df["total"] > 0, 1) * 100 | |
| df["rep_pct"] = df["rep"] / df["total"].where(df["total"] > 0, 1) * 100 | |
| # Signed margin in percentage points: + = Dem lean, - = Rep lean. | |
| df["margin_pp"] = df["dem_pct"] - df["rep_pct"] | |
| return df | |
| def load_geojson() -> gpd.GeoDataFrame: | |
| if not GEOJSON_CACHE.exists(): | |
| print(f"Downloading CA county geometry -> {GEOJSON_CACHE.name}") | |
| r = requests.get(GEOJSON_URL, timeout=30) | |
| r.raise_for_status() | |
| GEOJSON_CACHE.write_bytes(r.content) | |
| gdf = gpd.read_file(GEOJSON_CACHE) | |
| # The source file uses lowercase 'name' for the county name. | |
| name_col = next( | |
| (c for c in ("name", "NAME", "County", "county") if c in gdf.columns), | |
| None, | |
| ) | |
| if name_col is None: | |
| raise RuntimeError(f"County name column not found in {gdf.columns.tolist()}") | |
| gdf = gdf.rename(columns={name_col: "county"}) | |
| gdf["county"] = gdf["county"].str.strip() | |
| return gdf[["county", "geometry"]] | |
| def render_map(df: pd.DataFrame, out_path: Path, reporting_time: str) -> None: | |
| gdf = load_geojson().merge(df, on="county", how="left") | |
| missing = gdf[gdf["margin_pp"].isna()]["county"].tolist() | |
| if missing: | |
| print(f"WARN: no data merged for: {missing}", file=sys.stderr) | |
| # Symmetric diverging scale anchored on the actual data extremes, | |
| # but at least +/-30pp so light leans stay visible. | |
| extreme = max(30.0, float(gdf["margin_pp"].abs().max(skipna=True) or 30.0)) | |
| norm = TwoSlopeNorm(vmin=-extreme, vcenter=0.0, vmax=extreme) | |
| fig, ax = plt.subplots(figsize=(10, 12)) | |
| gdf.plot( | |
| column="margin_pp", | |
| cmap="RdBu", # red at low values, blue at high -> matches political convention | |
| norm=norm, | |
| linewidth=0.4, | |
| edgecolor="#333333", | |
| ax=ax, | |
| missing_kwds={"color": "lightgray", "edgecolor": "#333333", "hatch": "///"}, | |
| ) | |
| ax.set_axis_off() | |
| ax.set_title( | |
| "California Governor Primary — Dem vs Rep vote share by county\n" | |
| f"(margin = Dem% − Rep%, blue = Dem lean, red = Rep lean)\n" | |
| f"Updated: {reporting_time}", | |
| fontsize=12, | |
| ) | |
| # Colorbar | |
| sm = plt.cm.ScalarMappable(cmap="RdBu", norm=norm) | |
| sm.set_array([]) | |
| cbar = fig.colorbar(sm, ax=ax, orientation="horizontal", fraction=0.04, pad=0.02) | |
| cbar.set_label("Dem − Rep margin (percentage points of all governor votes)") | |
| fig.tight_layout() | |
| fig.savefig(out_path, dpi=180, bbox_inches="tight") | |
| plt.close(fig) | |
| print(f"Saved map -> {out_path}") | |
| def print_misleading_counties(df: pd.DataFrame) -> None: | |
| """Counties where top vote-getter was Republican but Dem total > 50%.""" | |
| mask = (df["top_party"] == "Rep") & (df["dem_pct"] > 50.0) | |
| flipped = df[mask].sort_values("dem_pct", ascending=False) | |
| print("\n" + "=" * 78) | |
| print("Counties where the top vote-getter was REPUBLICAN") | |
| print("but >50% of all governor votes went to Democratic candidates:") | |
| print("=" * 78) | |
| if flipped.empty: | |
| print(" (none)") | |
| else: | |
| print( | |
| f"{'County':<20} {'Top candidate':<28} {'Dem %':>7} {'Rep %':>7} " | |
| f"{'Margin':>8}" | |
| ) | |
| for _, r in flipped.iterrows(): | |
| print( | |
| f"{r['county']:<20} {r['top_candidate']:<28} " | |
| f"{r['dem_pct']:>6.1f}% {r['rep_pct']:>6.1f}% " | |
| f"{r['margin_pp']:>+7.1f}" | |
| ) | |
| print() | |
| def main() -> int: | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--out", default=str(HERE / "ca_gov_map.png")) | |
| ap.add_argument("--csv", default=str(HERE / "ca_gov_results.csv")) | |
| args = ap.parse_args() | |
| t0 = time.time() | |
| print(f"Fetching {len(COUNTIES)} counties from {API_BASE}/<county> ...") | |
| df = fetch_all() | |
| print(f"Fetched in {time.time() - t0:.1f}s") | |
| df.to_csv(args.csv, index=False) | |
| print(f"Saved per-county totals -> {args.csv}") | |
| reporting_time = df["reporting_time"].dropna().mode().iat[0] if not df.empty else "" | |
| render_map(df, Path(args.out), reporting_time) | |
| print_misleading_counties(df) | |
| total_dem = int(df["dem"].sum()) | |
| total_rep = int(df["rep"].sum()) | |
| total = int(df["total"].sum()) | |
| print( | |
| f"Statewide (sum of county returns): " | |
| f"Dem {total_dem:,} ({total_dem / total * 100:.1f}%) " | |
| f"Rep {total_rep:,} ({total_rep / total * 100:.1f}%) " | |
| f"Other {total - total_dem - total_rep:,}" | |
| ) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment