Created
June 17, 2026 20:03
-
-
Save NickCrews/a10156b48d6031d7dcb33dd68efbdc24 to your computer and use it in GitHub Desktop.
ibis backend popularity comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # /// script | |
| # requires-python = ">=3.9" | |
| # dependencies = [ | |
| # "matplotlib>=3.7", | |
| # ] | |
| # /// | |
| """ | |
| Ibis backend popularity proxy. `uv run <this_gist_url>` | |
| For each Ibis backend (and a couple not yet in Ibis), measure the monthly PyPI | |
| downloads of its driver package as a proxy for engine popularity, then plot. | |
| Data source: ClickHouse public "clickpy" PyPI dataset (no auth). | |
| endpoint: https://sql-clickhouse.clickhouse.com/?user=play | |
| Caveats (see chart legend): | |
| * generic/shared drivers (pyodbc, psycopg, psycopg2) overstate the backend | |
| * standalone engines (duckdb, polars, pyspark) are mostly downloaded for | |
| direct use, not via Ibis | |
| * sqlite uses the stdlib -> no driver package to measure (omitted) | |
| * this measures the engine ecosystem, NOT Ibis usage of that backend | |
| """ | |
| from __future__ import annotations | |
| import urllib.request | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| from matplotlib.patches import Patch | |
| # Last COMPLETE calendar month. Bump this when you re-run. | |
| MONTH = "2026-05-01" | |
| CLICKHOUSE = "https://sql-clickhouse.clickhouse.com/?user=play" | |
| # (backend, driver pypi package, category) | |
| # Driver packages come from ibis pyproject.toml [project.optional-dependencies]. | |
| # category: 0 dedicated driver | 1 generic/shared driver (overstated) | |
| # 2 standalone engine (overstated) | 3 not yet in Ibis | |
| # sqlite intentionally excluded: stdlib, no measurable driver package. | |
| BACKENDS = [ | |
| ("snowflake", "snowflake-connector-python", 0), | |
| ("databricks", "databricks-sql-connector", 0), | |
| ("bigquery", "google-cloud-bigquery", 0), | |
| ("postgres", "psycopg", 0), | |
| ("materialize", "psycopg", 1), # shares postgres driver | |
| ("polars", "polars", 2), | |
| ("pyspark", "pyspark", 2), | |
| ("risingwave", "psycopg2", 1), # generic pg driver | |
| ("duckdb", "duckdb", 2), | |
| ("mssql", "pyodbc", 1), # generic ODBC | |
| ("oracle", "oracledb", 0), | |
| ("mysql", "mysqlclient", 0), | |
| ("clickhouse", "clickhouse-connect", 0), | |
| ("athena", "pyathena", 0), | |
| ("trino", "trino", 0), | |
| ("exasol", "pyexasol", 0), | |
| ("impala", "impyla", 0), | |
| ("datafusion", "datafusion", 0), | |
| ("druid", "pydruid", 0), | |
| ("singlestoredb", "singlestoredb", 0), | |
| ("flink", "apache-flink", 0), | |
| # not yet in Ibis: | |
| ("starrocks", "starrocks", 3), | |
| ("gizmosql", "gizmosql", 3), | |
| ] | |
| # annotation suffix for the driver label, per category | |
| SUFFIX = {0: "", 1: "*", 2: "", 3: " [NOT in ibis]"} | |
| def fetch_downloads(packages, month): | |
| """Return {package: total_downloads} for the given month from clickpy. | |
| The monthly table is an aggregating MV with unmerged parts, so we must | |
| SUM(count) GROUP BY project rather than reading rows directly. | |
| """ | |
| pkg_list = ",".join("'" + p + "'" for p in sorted(set(packages))) | |
| sql = ( | |
| "SELECT project, sum(count) AS c " | |
| "FROM pypi.pypi_downloads_per_month " | |
| f"WHERE month = '{month}' AND project IN ({pkg_list}) " | |
| "GROUP BY project FORMAT TSV" | |
| ) | |
| req = urllib.request.Request(CLICKHOUSE, data=sql.encode()) | |
| with urllib.request.urlopen(req, timeout=60) as r: | |
| text = r.read().decode() | |
| out = {} | |
| for line in text.strip().splitlines(): | |
| proj, cnt = line.split("\t") | |
| out[proj] = int(cnt) | |
| return out | |
| def main(): | |
| dl = fetch_downloads([b[1] for b in BACKENDS], MONTH) | |
| rows = [] | |
| for backend, pkg, cat in BACKENDS: | |
| rows.append((backend, pkg, dl.get(pkg, 0), cat)) | |
| rows.sort(key=lambda r: r[2]) | |
| colors = {0: "#2563eb", 1: "#f59e0b", 2: "#8b5cf6", 3: "#16a34a"} | |
| labels = [f"{b} ({pkg}{SUFFIX[c]})" for b, pkg, _, c in rows] | |
| vals = [r[2] for r in rows] | |
| cols = [colors[r[3]] for r in rows] | |
| fig, ax = plt.subplots(figsize=(11, 9)) | |
| ax.barh(range(len(rows)), vals, color=cols, edgecolor="white") | |
| ax.set_yticks(range(len(rows))) | |
| ax.set_yticklabels(labels, fontsize=9) | |
| ax.set_xscale("log") | |
| ax.set_xlabel( | |
| f"PyPI downloads, {MONTH[:7]} (log scale) — source: ClickHouse clickpy dataset" | |
| ) | |
| ax.set_title( | |
| "Ibis backend popularity proxy: monthly PyPI downloads of each " | |
| "backend's driver package", | |
| fontsize=11, | |
| pad=12, | |
| ) | |
| for i, v in enumerate(vals): | |
| txt = f"{v / 1e6:.1f}M" if v >= 1e6 else f"{v / 1e3:.0f}k" | |
| ax.text(v * 1.05, i, txt, va="center", fontsize=8) | |
| ax.set_xlim(2e3, 5e8) | |
| legend = [ | |
| Patch(color="#2563eb", label="Ibis backend, dedicated driver"), | |
| Patch( | |
| color="#f59e0b", label="Ibis backend, GENERIC/SHARED driver (overstated *)" | |
| ), | |
| Patch( | |
| color="#8b5cf6", label="Ibis backend, also a standalone engine (overstated)" | |
| ), | |
| Patch(color="#16a34a", label="Not yet in Ibis"), | |
| ] | |
| ax.legend(handles=legend, loc="lower right", fontsize=8, framealpha=0.95) | |
| ax.text( | |
| 2e3, | |
| -1.8, | |
| "sqlite: omitted — stdlib, no driver package to measure.", | |
| fontsize=8, | |
| style="italic", | |
| color="#555", | |
| ) | |
| plt.tight_layout() | |
| plt.savefig("ibis_backend_popularity.png", dpi=150, bbox_inches="tight") | |
| print("saved ibis_backend_popularity.png") | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Inspired by ibis-project/ibis#12017. This gives, on Jun 17 2026:
