Last active
August 25, 2025 21:06
-
-
Save Sancus/fe2c841fa340901a284b361e799786ed to your computer and use it in GitHub Desktop.
A script that outputs two graphs graph showing the uptake rate of Thunderbird ESRs and the # of support questions relative to uptake, respectively.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# This Source Code Form is subject to the terms of the Mozilla Public | |
# License, v. 2.0. If a copy of the MPL was not distributed with this | |
# file, You can obtain one at https://mozilla.org/MPL/2.0/. | |
import json, re | |
from pathlib import Path | |
from typing import Dict, Any, Optional | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
ESR_VERSIONS = [60, 68, 78, 91, 102, 115, 128, 140] | |
PREV_ESR = {60: 52, 68: 60, 78: 68, 91: 78, 102: 91, 115: 102, 128: 115, 140: 128} | |
BASE_URL = "https://stats.thunderbird.net/{ver}uptake.json" | |
SUPPORT_URL = "https://stats.thunderbird.net/sumo.json" | |
WEEKDAY_ONLY = True | |
CUTOFF_DAYS = 200 | |
BOLD_ESR = 140 | |
EXCLUDE_SUPPORT_ESRS = {60, 68} | |
RELEASE_DATES = { | |
60: "2018-08-06", | |
68: "2019-08-27", | |
78: "2020-07-17", | |
91: "2021-08-11", | |
102: "2022-06-29", | |
115: "2023-07-11", | |
128: "2024-07-11", | |
140: "2025-07-07", | |
} | |
UPTAKE_PNG = "tb_esr_uptake.png" | |
UPTAKE_MILESTONES_CSV = "tb_esr_uptake_milestones.csv" | |
SUPPORT_VS_UPTAKE_PNG = "tb_support_vs_uptake.png" | |
SUPPORT_VS_UPTAKE_CSV = "tb_support_vs_uptake_points.csv" | |
def try_fetch(url: str) -> Optional[str]: | |
try: | |
import urllib.request as ureq | |
req = ureq.Request(url, headers={"User-Agent": "Mozilla/5.0"}) | |
with ureq.urlopen(req, timeout=30) as r: | |
return r.read().decode("utf-8", errors="ignore") | |
except Exception: | |
return None | |
def load_json_from_url_or_file(url: str, local_name: str) -> Optional[Dict[str, Any]]: | |
text = try_fetch(url) | |
if text is None: | |
p = Path(local_name) | |
if p.exists(): | |
text = p.read_text(encoding="utf-8", errors="ignore") | |
else: | |
return None | |
try: | |
return json.loads(text) | |
except Exception: | |
try: | |
if "=" in text: | |
text2 = text.split("=", 1)[1].strip().rstrip(";") | |
return json.loads(text2) | |
except Exception: | |
pass | |
return None | |
def parse_major(key: str) -> Optional[int]: | |
m = re.match(r"^(\d+)", str(key).strip()) | |
return int(m.group(1)) if m else None | |
def has_and_below(key: str) -> bool: | |
return "and below" in str(key) | |
def load_all_uptake() -> Dict[int, Dict[str, Any]]: | |
datasets = {} | |
for ver in ESR_VERSIONS: | |
url = BASE_URL.format(ver=ver) | |
obj = load_json_from_url_or_file(url, f"{ver}uptake.json") | |
if obj: | |
datasets[ver] = obj | |
return datasets | |
def compute_uptake_series(datasets: Dict[int, Dict[str, Any]], esr_ver: int) -> Optional[pd.DataFrame]: | |
if esr_ver not in datasets: | |
return None | |
p = PREV_ESR.get(esr_ver) | |
data_v = datasets[esr_ver] | |
rows = [] | |
for day, entry in sorted(data_v.items()): | |
versions_v = entry.get("versions", {}) | |
num = 0 | |
denom = 0 | |
for k, v in versions_v.items(): | |
major = parse_major(k) | |
if major is None or not isinstance(v, (int, float)): | |
continue | |
# strictly ignore "X and below" buckets | |
if has_and_below(k): | |
continue | |
# numerator: exactly ESR major | |
if major == esr_ver: | |
num += v | |
# denominator: previous ESR and newer | |
if p is None or major >= p: | |
denom += v | |
if denom > 0: | |
rows.append((pd.to_datetime(day), num / denom * 100.0)) | |
if not rows: | |
return None | |
df = pd.DataFrame(rows, columns=["date", "uptake"]).set_index("date").sort_index() | |
# Align to official release date | |
start = pd.to_datetime(RELEASE_DATES.get(esr_ver)) | |
df = df[df.index >= start] | |
df["days"] = (df.index - start).days | |
if WEEKDAY_ONLY: | |
df = df[df.index.weekday < 5] | |
df = df[df["days"] <= CUTOFF_DAYS] | |
return df | |
def load_support_series() -> Optional[pd.DataFrame]: | |
obj = load_json_from_url_or_file(SUPPORT_URL, "sumo.json") | |
if not obj: | |
return None | |
df = pd.DataFrame(list(obj.items()), columns=["date", "questions"]) | |
df["date"] = pd.to_datetime(df["date"]) | |
return df.set_index("date").sort_index() | |
def plot_uptake(datasets: Dict[int, Dict[str, Any]]) -> pd.DataFrame: | |
series = {} | |
for ver in ESR_VERSIONS: | |
df = compute_uptake_series(datasets, ver) | |
if df is not None and not df.empty: | |
series[ver] = df | |
if not series: | |
raise SystemExit("No uptake series built.") | |
plt.figure(figsize=(10,6)) | |
for ver, df in series.items(): | |
if ver == BOLD_ESR: | |
plt.plot(df["days"], df["uptake"], label=str(ver), color="blue", linewidth=2.5) | |
else: | |
plt.plot(df["days"], df["uptake"], label=str(ver)) | |
plt.xlabel(f"Days since ESR release ({'weekdays only, ' if WEEKDAY_ONLY else ''}cutoff {CUTOFF_DAYS})") | |
plt.ylabel("Uptake (%) among {prev ESR}+ cohort") | |
plt.title("Thunderbird ESR Uptake") | |
plt.legend(); plt.grid(True, linestyle="--", alpha=0.5); plt.xlim(0, CUTOFF_DAYS); plt.tight_layout() | |
plt.savefig(UPTAKE_PNG, dpi=160) | |
# milestones | |
rows = [] | |
for ver, df in series.items(): | |
def milestone(t): | |
hit = df[df["uptake"] >= t] | |
return int(hit["days"].min()) if not hit.empty else None | |
rows.append({"ESR": ver, "T50_days": milestone(50), "T75_days": milestone(75), "T90_days": milestone(90)}) | |
pd.DataFrame(rows).sort_values("ESR").to_csv(UPTAKE_MILESTONES_CSV, index=False) | |
# return concat | |
out = [] | |
for ver, df in series.items(): | |
tmp = df.copy(); tmp["ESR"] = ver; out.append(tmp.reset_index()) | |
return pd.concat(out, ignore_index=True) | |
def plot_support_vs_uptake(uptake_concat: pd.DataFrame, support_df: pd.DataFrame): | |
support_vs = {} | |
for ver in sorted(uptake_concat["ESR"].unique()): | |
start = pd.to_datetime(RELEASE_DATES.get(ver)) | |
df_s = support_df[support_df.index >= start].copy() | |
df_s["days"] = (df_s.index - start).days | |
df_s = df_s[df_s["days"] <= CUTOFF_DAYS] | |
if WEEKDAY_ONLY: | |
df_s = df_s[df_s.index.weekday < 5] | |
weekly_support = df_s["questions"].resample("W-MON").sum() | |
df_u = uptake_concat[uptake_concat["ESR"] == ver].set_index("date").sort_index() | |
weekly_uptake = df_u["uptake"].resample("W-MON").mean() | |
merged = pd.concat([weekly_uptake, weekly_support], axis=1, join="inner").dropna() | |
if not merged.empty: | |
support_vs[ver] = merged | |
plt.figure(figsize=(10,6)) | |
for ver, df in support_vs.items(): | |
if ver in EXCLUDE_SUPPORT_ESRS: | |
continue | |
if ver == BOLD_ESR: | |
plt.plot(df["uptake"], df["questions"], label=str(ver), color="blue", linewidth=2.5) | |
else: | |
plt.plot(df["uptake"], df["questions"], label=str(ver)) | |
plt.xlabel("Uptake (%)"); plt.ylabel("Support questions (#, weekly sum)") | |
plt.title("Thunderbird Support Questions vs. Uptake") | |
plt.legend(); plt.grid(True, linestyle="--", alpha=0.5); plt.tight_layout() | |
plt.savefig(SUPPORT_VS_UPTAKE_PNG, dpi=160) | |
# export pairs | |
rows = [] | |
for ver, df in support_vs.items(): | |
if ver in EXCLUDE_SUPPORT_ESRS: | |
continue | |
for dt, row in df.iterrows(): | |
rows.append({"ESR": ver, "week": dt.date().isoformat(), | |
"uptake_pct": round(float(row["uptake"]), 4), | |
"support_questions": int(row["questions"]) }) | |
pd.DataFrame(rows).to_csv(SUPPORT_VS_UPTAKE_CSV, index=False) | |
def main(): | |
datasets = load_all_uptake() | |
support_df = load_support_series() | |
if not datasets or support_df is None or support_df.empty: | |
raise SystemExit("Missing datasets.") | |
uptake_concat = plot_uptake(datasets) | |
plot_support_vs_uptake(uptake_concat, support_df) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment