zbowling · June 4, 2026 04:25
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.11"
 # dependencies = [
 #   "requests",
 #   "geopandas",
 #   "matplotlib",
 #   "pandas",
 #   "shapely",
 # ]
 # ///
 """
 California governor primary — county map of Dem vs Rep vote share.

 Re-run any time as more results arrive:
    uv run ca_gov_map.py

 Outputs:
    ca_gov_map.png        choropleth (dark blue Dem -> white -> dark red Rep)
    ca_gov_results.csv    per-county Dem/Rep totals + margin
 """

 from __future__ import annotations

 import argparse
 import json
 import sys
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path

 import geopandas as gpd
 import matplotlib.pyplot as plt
 import pandas as pd
 import requests
 from matplotlib.colors import TwoSlopeNorm

 COUNTIES = [
    "Alameda", "Alpine", "Amador", "Butte", "Calaveras", "Colusa",
    "Contra Costa", "Del Norte", "El Dorado", "Fresno", "Glenn",
    "Humboldt", "Imperial", "Inyo", "Kern", "Kings", "Lake", "Lassen",
    "Los Angeles", "Madera", "Marin", "Mariposa", "Mendocino", "Merced",
    "Modoc", "Mono", "Monterey", "Napa", "Nevada", "Orange", "Placer",
    "Plumas", "Riverside", "Sacramento", "San Benito", "San Bernardino",
    "San Diego", "San Francisco", "San Joaquin", "San Luis Obispo",
    "San Mateo", "Santa Barbara", "Santa Clara", "Santa Cruz", "Shasta",
    "Sierra", "Siskiyou", "Solano", "Sonoma", "Stanislaus", "Sutter",
    "Tehama", "Trinity", "Tulare", "Tuolumne", "Ventura", "Yolo", "Yuba",
 ]

 API_BASE = "https://api.sos.ca.gov/returns/governor/county"
 GEOJSON_URL = (
    "https://raw.githubusercontent.com/codeforgermany/click_that_hood/"
    "main/public/data/california-counties.geojson"
 )
 HERE = Path(__file__).resolve().parent
 GEOJSON_CACHE = HERE / "california-counties.geojson"


 def slugify(name: str) -> str:
    # The SoS API uses hyphens for multi-word counties (e.g. san-mateo).
    # Concatenated slugs (sanmateo) silently return the statewide payload
    # instead of 404'ing, which would corrupt aggregates — so hyphenate.
    return name.lower().replace(" ", "-")


 def parse_votes(v: str) -> int:
    return int(v.replace(",", "").strip() or 0)


 def fetch_county(name: str, session: requests.Session) -> dict:
    url = f"{API_BASE}/{slugify(name)}"
    r = session.get(url, timeout=20)
    r.raise_for_status()
    data = r.json()
    payload = data[0] if isinstance(data, list) else data

    title = payload.get("raceTitle", "")
    if name.lower() not in title.lower():
        raise RuntimeError(
            f"slug mismatch for {name!r}: API returned raceTitle={title!r} "
            f"(likely got statewide instead of county data — check slug)"
        )

    candidates = payload.get("candidates", [])

    dem = rep = other = top_votes = 0
    top_name = top_party = ""
    for c in candidates:
        votes = parse_votes(c.get("Votes", "0"))
        party = c.get("Party", "")
        if party == "Dem":
            dem += votes
        elif party == "Rep":
            rep += votes
        else:
            other += votes
        if votes > top_votes:
            top_votes = votes
            top_name = c.get("Name", "")
            top_party = party

    return {
        "county": name,
        "dem": dem,
        "rep": rep,
        "other": other,
        "total": dem + rep + other,
        "top_candidate": top_name,
        "top_party": top_party,
        "top_votes": top_votes,
        "reporting": payload.get("Reporting", ""),
        "reporting_time": payload.get("ReportingTime", ""),
    }


 def fetch_all(verbose: bool = True) -> pd.DataFrame:
    rows: list[dict] = []
    errors: list[tuple[str, str]] = []
    with requests.Session() as s:
        s.headers["User-Agent"] = "ca-gov-map/1.0"
        with ThreadPoolExecutor(max_workers=8) as pool:
            futures = {pool.submit(fetch_county, c, s): c for c in COUNTIES}
            for fut in as_completed(futures):
                county = futures[fut]
                try:
                    rows.append(fut.result())
                    if verbose:
                        print(f"  fetched {county}")
                except Exception as e:
                    errors.append((county, str(e)))
                    print(f"  FAILED {county}: {e}", file=sys.stderr)

    if errors:
        print(f"\n{len(errors)} county fetch(es) failed.", file=sys.stderr)

    df = pd.DataFrame(rows).sort_values("county").reset_index(drop=True)
    df["dem_pct"] = df["dem"] / df["total"].where(df["total"] > 0, 1) * 100
    df["rep_pct"] = df["rep"] / df["total"].where(df["total"] > 0, 1) * 100
    # Signed margin in percentage points: + = Dem lean, - = Rep lean.
    df["margin_pp"] = df["dem_pct"] - df["rep_pct"]
    return df


 def load_geojson() -> gpd.GeoDataFrame:
    if not GEOJSON_CACHE.exists():
        print(f"Downloading CA county geometry -> {GEOJSON_CACHE.name}")
        r = requests.get(GEOJSON_URL, timeout=30)
        r.raise_for_status()
        GEOJSON_CACHE.write_bytes(r.content)
    gdf = gpd.read_file(GEOJSON_CACHE)
    # The source file uses lowercase 'name' for the county name.
    name_col = next(
        (c for c in ("name", "NAME", "County", "county") if c in gdf.columns),
        None,
    )
    if name_col is None:
        raise RuntimeError(f"County name column not found in {gdf.columns.tolist()}")
    gdf = gdf.rename(columns={name_col: "county"})
    gdf["county"] = gdf["county"].str.strip()
    return gdf[["county", "geometry"]]


 def render_map(df: pd.DataFrame, out_path: Path, reporting_time: str) -> None:
    gdf = load_geojson().merge(df, on="county", how="left")

    missing = gdf[gdf["margin_pp"].isna()]["county"].tolist()
    if missing:
        print(f"WARN: no data merged for: {missing}", file=sys.stderr)

    # Symmetric diverging scale anchored on the actual data extremes,
    # but at least +/-30pp so light leans stay visible.
    extreme = max(30.0, float(gdf["margin_pp"].abs().max(skipna=True) or 30.0))
    norm = TwoSlopeNorm(vmin=-extreme, vcenter=0.0, vmax=extreme)

    fig, ax = plt.subplots(figsize=(10, 12))
    gdf.plot(
        column="margin_pp",
        cmap="RdBu",        # red at low values, blue at high -> matches political convention
        norm=norm,
        linewidth=0.4,
        edgecolor="#333333",
        ax=ax,
        missing_kwds={"color": "lightgray", "edgecolor": "#333333", "hatch": "///"},
    )
    ax.set_axis_off()
    ax.set_title(
        "California Governor Primary — Dem vs Rep vote share by county\n"
        f"(margin = Dem% − Rep%, blue = Dem lean, red = Rep lean)\n"
        f"Updated: {reporting_time}",
        fontsize=12,
    )

    # Colorbar
    sm = plt.cm.ScalarMappable(cmap="RdBu", norm=norm)
    sm.set_array([])
    cbar = fig.colorbar(sm, ax=ax, orientation="horizontal", fraction=0.04, pad=0.02)
    cbar.set_label("Dem − Rep margin (percentage points of all governor votes)")

    fig.tight_layout()
    fig.savefig(out_path, dpi=180, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved map -> {out_path}")


 def print_misleading_counties(df: pd.DataFrame) -> None:
    """Counties where top vote-getter was Republican but Dem total > 50%."""
    mask = (df["top_party"] == "Rep") & (df["dem_pct"] > 50.0)
    flipped = df[mask].sort_values("dem_pct", ascending=False)

    print("\n" + "=" * 78)
    print("Counties where the top vote-getter was REPUBLICAN")
    print("but >50% of all governor votes went to Democratic candidates:")
    print("=" * 78)
    if flipped.empty:
        print("  (none)")
    else:
        print(
            f"{'County':<20} {'Top candidate':<28} {'Dem %':>7} {'Rep %':>7} "
            f"{'Margin':>8}"
        )
        for _, r in flipped.iterrows():
            print(
                f"{r['county']:<20} {r['top_candidate']:<28} "
                f"{r['dem_pct']:>6.1f}% {r['rep_pct']:>6.1f}% "
                f"{r['margin_pp']:>+7.1f}"
            )
    print()


 def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--out", default=str(HERE / "ca_gov_map.png"))
    ap.add_argument("--csv", default=str(HERE / "ca_gov_results.csv"))
    args = ap.parse_args()

    t0 = time.time()
    print(f"Fetching {len(COUNTIES)} counties from {API_BASE}/<county> ...")
    df = fetch_all()
    print(f"Fetched in {time.time() - t0:.1f}s")

    df.to_csv(args.csv, index=False)
    print(f"Saved per-county totals -> {args.csv}")

    reporting_time = df["reporting_time"].dropna().mode().iat[0] if not df.empty else ""
    render_map(df, Path(args.out), reporting_time)
    print_misleading_counties(df)

    total_dem = int(df["dem"].sum())
    total_rep = int(df["rep"].sum())
    total = int(df["total"].sum())
    print(
        f"Statewide (sum of county returns): "
        f"Dem {total_dem:,} ({total_dem / total * 100:.1f}%)  "
        f"Rep {total_rep:,} ({total_rep / total * 100:.1f}%)  "
        f"Other {total - total_dem - total_rep:,}"
    )
    return 0


 if __name__ == "__main__":
    sys.exit(main())
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.11"
	# dependencies = [
	# "requests",
	# "geopandas",
	# "matplotlib",
	# "pandas",
	# "shapely",
	# ]
	# ///
	"""
	California governor primary — county map of Dem vs Rep vote share.

	Re-run any time as more results arrive:
	uv run ca_gov_map.py

	Outputs:
	ca_gov_map.png choropleth (dark blue Dem -> white -> dark red Rep)
	ca_gov_results.csv per-county Dem/Rep totals + margin
	"""

	from __future__ import annotations

	import argparse
	import json
	import sys
	import time
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from pathlib import Path

	import geopandas as gpd
	import matplotlib.pyplot as plt
	import pandas as pd
	import requests
	from matplotlib.colors import TwoSlopeNorm

	COUNTIES = [
	"Alameda", "Alpine", "Amador", "Butte", "Calaveras", "Colusa",
	"Contra Costa", "Del Norte", "El Dorado", "Fresno", "Glenn",
	"Humboldt", "Imperial", "Inyo", "Kern", "Kings", "Lake", "Lassen",
	"Los Angeles", "Madera", "Marin", "Mariposa", "Mendocino", "Merced",
	"Modoc", "Mono", "Monterey", "Napa", "Nevada", "Orange", "Placer",
	"Plumas", "Riverside", "Sacramento", "San Benito", "San Bernardino",
	"San Diego", "San Francisco", "San Joaquin", "San Luis Obispo",
	"San Mateo", "Santa Barbara", "Santa Clara", "Santa Cruz", "Shasta",
	"Sierra", "Siskiyou", "Solano", "Sonoma", "Stanislaus", "Sutter",
	"Tehama", "Trinity", "Tulare", "Tuolumne", "Ventura", "Yolo", "Yuba",
	]

	API_BASE = "https://api.sos.ca.gov/returns/governor/county"
	GEOJSON_URL = (
	"https://raw.githubusercontent.com/codeforgermany/click_that_hood/"
	"main/public/data/california-counties.geojson"
	)
	HERE = Path(__file__).resolve().parent
	GEOJSON_CACHE = HERE / "california-counties.geojson"


	def slugify(name: str) -> str:
	# The SoS API uses hyphens for multi-word counties (e.g. san-mateo).
	# Concatenated slugs (sanmateo) silently return the statewide payload
	# instead of 404'ing, which would corrupt aggregates — so hyphenate.
	return name.lower().replace(" ", "-")


	def parse_votes(v: str) -> int:
	return int(v.replace(",", "").strip() or 0)


	def fetch_county(name: str, session: requests.Session) -> dict:
	url = f"{API_BASE}/{slugify(name)}"
	r = session.get(url, timeout=20)
	r.raise_for_status()
	data = r.json()
	payload = data[0] if isinstance(data, list) else data

	title = payload.get("raceTitle", "")
	if name.lower() not in title.lower():
	raise RuntimeError(
	f"slug mismatch for {name!r}: API returned raceTitle={title!r} "
	f"(likely got statewide instead of county data — check slug)"
	)

	candidates = payload.get("candidates", [])

	dem = rep = other = top_votes = 0
	top_name = top_party = ""
	for c in candidates:
	votes = parse_votes(c.get("Votes", "0"))
	party = c.get("Party", "")
	if party == "Dem":
	dem += votes
	elif party == "Rep":
	rep += votes
	else:
	other += votes
	if votes > top_votes:
	top_votes = votes
	top_name = c.get("Name", "")
	top_party = party

	return {
	"county": name,
	"dem": dem,
	"rep": rep,
	"other": other,
	"total": dem + rep + other,
	"top_candidate": top_name,
	"top_party": top_party,
	"top_votes": top_votes,
	"reporting": payload.get("Reporting", ""),
	"reporting_time": payload.get("ReportingTime", ""),
	}


	def fetch_all(verbose: bool = True) -> pd.DataFrame:
	rows: list[dict] = []
	errors: list[tuple[str, str]] = []
	with requests.Session() as s:
	s.headers["User-Agent"] = "ca-gov-map/1.0"
	with ThreadPoolExecutor(max_workers=8) as pool:
	futures = {pool.submit(fetch_county, c, s): c for c in COUNTIES}
	for fut in as_completed(futures):
	county = futures[fut]
	try:
	rows.append(fut.result())
	if verbose:
	print(f" fetched {county}")
	except Exception as e:
	errors.append((county, str(e)))
	print(f" FAILED {county}: {e}", file=sys.stderr)

	if errors:
	print(f"\n{len(errors)} county fetch(es) failed.", file=sys.stderr)

	df = pd.DataFrame(rows).sort_values("county").reset_index(drop=True)
	df["dem_pct"] = df["dem"] / df["total"].where(df["total"] > 0, 1) * 100
	df["rep_pct"] = df["rep"] / df["total"].where(df["total"] > 0, 1) * 100
	# Signed margin in percentage points: + = Dem lean, - = Rep lean.
	df["margin_pp"] = df["dem_pct"] - df["rep_pct"]
	return df


	def load_geojson() -> gpd.GeoDataFrame:
	if not GEOJSON_CACHE.exists():
	print(f"Downloading CA county geometry -> {GEOJSON_CACHE.name}")
	r = requests.get(GEOJSON_URL, timeout=30)
	r.raise_for_status()
	GEOJSON_CACHE.write_bytes(r.content)
	gdf = gpd.read_file(GEOJSON_CACHE)
	# The source file uses lowercase 'name' for the county name.
	name_col = next(
	(c for c in ("name", "NAME", "County", "county") if c in gdf.columns),
	None,
	)
	if name_col is None:
	raise RuntimeError(f"County name column not found in {gdf.columns.tolist()}")
	gdf = gdf.rename(columns={name_col: "county"})
	gdf["county"] = gdf["county"].str.strip()
	return gdf[["county", "geometry"]]


	def render_map(df: pd.DataFrame, out_path: Path, reporting_time: str) -> None:
	gdf = load_geojson().merge(df, on="county", how="left")

	missing = gdf[gdf["margin_pp"].isna()]["county"].tolist()
	if missing:
	print(f"WARN: no data merged for: {missing}", file=sys.stderr)

	# Symmetric diverging scale anchored on the actual data extremes,
	# but at least +/-30pp so light leans stay visible.
	extreme = max(30.0, float(gdf["margin_pp"].abs().max(skipna=True) or 30.0))
	norm = TwoSlopeNorm(vmin=-extreme, vcenter=0.0, vmax=extreme)

	fig, ax = plt.subplots(figsize=(10, 12))
	gdf.plot(
	column="margin_pp",
	cmap="RdBu", # red at low values, blue at high -> matches political convention
	norm=norm,
	linewidth=0.4,
	edgecolor="#333333",
	ax=ax,
	missing_kwds={"color": "lightgray", "edgecolor": "#333333", "hatch": "///"},
	)
	ax.set_axis_off()
	ax.set_title(
	"California Governor Primary — Dem vs Rep vote share by county\n"
	f"(margin = Dem% − Rep%, blue = Dem lean, red = Rep lean)\n"
	f"Updated: {reporting_time}",
	fontsize=12,
	)

	# Colorbar
	sm = plt.cm.ScalarMappable(cmap="RdBu", norm=norm)
	sm.set_array([])
	cbar = fig.colorbar(sm, ax=ax, orientation="horizontal", fraction=0.04, pad=0.02)
	cbar.set_label("Dem − Rep margin (percentage points of all governor votes)")

	fig.tight_layout()
	fig.savefig(out_path, dpi=180, bbox_inches="tight")
	plt.close(fig)
	print(f"Saved map -> {out_path}")


	def print_misleading_counties(df: pd.DataFrame) -> None:
	"""Counties where top vote-getter was Republican but Dem total > 50%."""
	mask = (df["top_party"] == "Rep") & (df["dem_pct"] > 50.0)
	flipped = df[mask].sort_values("dem_pct", ascending=False)

	print("\n" + "=" * 78)
	print("Counties where the top vote-getter was REPUBLICAN")
	print("but >50% of all governor votes went to Democratic candidates:")
	print("=" * 78)
	if flipped.empty:
	print(" (none)")
	else:
	print(
	f"{'County':<20} {'Top candidate':<28} {'Dem %':>7} {'Rep %':>7} "
	f"{'Margin':>8}"
	)
	for _, r in flipped.iterrows():
	print(
	f"{r['county']:<20} {r['top_candidate']:<28} "
	f"{r['dem_pct']:>6.1f}% {r['rep_pct']:>6.1f}% "
	f"{r['margin_pp']:>+7.1f}"
	)
	print()


	def main() -> int:
	ap = argparse.ArgumentParser()
	ap.add_argument("--out", default=str(HERE / "ca_gov_map.png"))
	ap.add_argument("--csv", default=str(HERE / "ca_gov_results.csv"))
	args = ap.parse_args()

	t0 = time.time()
	print(f"Fetching {len(COUNTIES)} counties from {API_BASE}/<county> ...")
	df = fetch_all()
	print(f"Fetched in {time.time() - t0:.1f}s")

	df.to_csv(args.csv, index=False)
	print(f"Saved per-county totals -> {args.csv}")

	reporting_time = df["reporting_time"].dropna().mode().iat[0] if not df.empty else ""
	render_map(df, Path(args.out), reporting_time)
	print_misleading_counties(df)

	total_dem = int(df["dem"].sum())
	total_rep = int(df["rep"].sum())
	total = int(df["total"].sum())
	print(
	f"Statewide (sum of county returns): "
	f"Dem {total_dem:,} ({total_dem / total * 100:.1f}%) "
	f"Rep {total_rep:,} ({total_rep / total * 100:.1f}%) "
	f"Other {total - total_dem - total_rep:,}"
	)
	return 0


	if __name__ == "__main__":
	sys.exit(main())
No results found