Skip to content

Instantly share code, notes, and snippets.

@schicks
Last active September 20, 2022 16:00
Show Gist options
  • Save schicks/5ea4085acf6312e0281ec48a103fc1d5 to your computer and use it in GitHub Desktop.
Save schicks/5ea4085acf6312e0281ec48a103fc1d5 to your computer and use it in GitHub Desktop.
Voting methods with polars dataframes
polars
jupyter
numpy
pyarrow
pandas
altair
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"import numpy as np\n",
"import altair as alt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(123457789))) #seeded random state"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"voters = pl.DataFrame({ # imagine that voters opinions can be mapped into a 2d plane\n",
" \"x\": pl.Series(values=rs.normal(size=100, ).tolist()),\n",
" \"y\": pl.Series(values=rs.normal(size=100).tolist())\n",
"})\n",
"candidates = pl.DataFrame({ # and that they prefer candidates that are close to them\n",
" \"x\": pl.Series(values=rs.normal(size=7).tolist()),\n",
" \"y\": pl.Series(values=rs.normal(size=7).tolist()),\n",
" \"name\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\"]\n",
"})\n",
"election = voters.join(candidates, how=\"cross\", suffix=\"_candidate\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe td {\n",
" white-space: pre;\n",
" }\n",
"\n",
" .dataframe td {\n",
" padding-top: 0;\n",
" }\n",
"\n",
" .dataframe td {\n",
" padding-bottom: 0;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\" >\n",
"<small>shape: (7, 2)</small>\n",
"<thead>\n",
"<tr>\n",
"<th>\n",
"name\n",
"</th>\n",
"<th>\n",
"count\n",
"</th>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"str\n",
"</td>\n",
"<td>\n",
"u32\n",
"</td>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td>\n",
"&quot;b&quot;\n",
"</td>\n",
"<td>\n",
"35\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;c&quot;\n",
"</td>\n",
"<td>\n",
"32\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;f&quot;\n",
"</td>\n",
"<td>\n",
"32\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;g&quot;\n",
"</td>\n",
"<td>\n",
"30\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;e&quot;\n",
"</td>\n",
"<td>\n",
"24\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;a&quot;\n",
"</td>\n",
"<td>\n",
"22\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;d&quot;\n",
"</td>\n",
"<td>\n",
"5\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"shape: (7, 2)\n",
"┌──────┬───────┐\n",
"│ name ┆ count │\n",
"│ --- ┆ --- │\n",
"│ str ┆ u32 │\n",
"╞══════╪═══════╡\n",
"│ b ┆ 35 │\n",
"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ c ┆ 32 │\n",
"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ f ┆ 32 │\n",
"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ g ┆ 30 │\n",
"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ e ┆ 24 │\n",
"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ a ┆ 22 │\n",
"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ d ┆ 5 │\n",
"└──────┴───────┘"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xdist = (pl.col(\"x\") - pl.col(\"x_candidate\")).pow(2)\n",
"ydist = (pl.col(\"y\") - pl.col(\"y_candidate\")).pow(2)\n",
"distance = (xdist + ydist).sqrt().alias(\"distance\")\n",
"approval_vote = distance <= 1 # simple approval voting strategy; vote for anyone within a set distance o fyou\n",
"\n",
"election.lazy().select([ # approval voting can be calculated with a single expression!\n",
" pl.col(\"name\").filter(approval_vote),\n",
"]).groupby(\"name\").agg([pl.count()]).sort(\"count\", reverse=True).collect()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe td {\n",
" white-space: pre;\n",
" }\n",
"\n",
" .dataframe td {\n",
" padding-top: 0;\n",
" }\n",
"\n",
" .dataframe td {\n",
" padding-bottom: 0;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\" >\n",
"<small>shape: (3, 3)</small>\n",
"<thead>\n",
"<tr>\n",
"<th>\n",
"x\n",
"</th>\n",
"<th>\n",
"y\n",
"</th>\n",
"<th>\n",
"vote\n",
"</th>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"f64\n",
"</td>\n",
"<td>\n",
"f64\n",
"</td>\n",
"<td>\n",
"list[str]\n",
"</td>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td>\n",
"0.361286\n",
"</td>\n",
"<td>\n",
"-1.464302\n",
"</td>\n",
"<td>\n",
"[&quot;f&quot;, &quot;b&quot;, ... &quot;a&quot;]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"1.127261\n",
"</td>\n",
"<td>\n",
"-0.774722\n",
"</td>\n",
"<td>\n",
"[&quot;b&quot;, &quot;f&quot;, ... &quot;d&quot;]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"0.25419\n",
"</td>\n",
"<td>\n",
"-0.841139\n",
"</td>\n",
"<td>\n",
"[&quot;b&quot;, &quot;f&quot;, ... &quot;d&quot;]\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"shape: (3, 3)\n",
"┌──────────┬───────────┬─────────────────────┐\n",
"│ x ┆ y ┆ vote │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ list[str] │\n",
"╞══════════╪═══════════╪═════════════════════╡\n",
"│ 0.361286 ┆ -1.464302 ┆ [\"f\", \"b\", ... \"a\"] │\n",
"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
"│ 1.127261 ┆ -0.774722 ┆ [\"b\", \"f\", ... \"d\"] │\n",
"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
"│ 0.25419 ┆ -0.841139 ┆ [\"b\", \"f\", ... \"d\"] │\n",
"└──────────┴───────────┴─────────────────────┘"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ballots = election.lazy().select([ # ranked methods are more complicated. Generate a frame of ballots to start with.\n",
" \"x\",\n",
" \"y\",\n",
" \"name\",\n",
" distance\n",
"]).groupby([\"x\", \"y\"]).agg([pl.col(\"name\").sort_by(\"distance\").list().alias(\"vote\")]).collect()\n",
"ballots.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe td {\n",
" white-space: pre;\n",
" }\n",
"\n",
" .dataframe td {\n",
" padding-top: 0;\n",
" }\n",
"\n",
" .dataframe td {\n",
" padding-bottom: 0;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\" >\n",
"<small>shape: (7, 2)</small>\n",
"<thead>\n",
"<tr>\n",
"<th>\n",
"candidate\n",
"</th>\n",
"<th>\n",
"count\n",
"</th>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"str\n",
"</td>\n",
"<td>\n",
"u32\n",
"</td>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td>\n",
"&quot;b&quot;\n",
"</td>\n",
"<td>\n",
"19\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;c&quot;\n",
"</td>\n",
"<td>\n",
"19\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;f&quot;\n",
"</td>\n",
"<td>\n",
"17\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;a&quot;\n",
"</td>\n",
"<td>\n",
"16\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;e&quot;\n",
"</td>\n",
"<td>\n",
"13\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;g&quot;\n",
"</td>\n",
"<td>\n",
"13\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td>\n",
"&quot;d&quot;\n",
"</td>\n",
"<td>\n",
"3\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"shape: (7, 2)\n",
"┌───────────┬───────┐\n",
"│ candidate ┆ count │\n",
"│ --- ┆ --- │\n",
"│ str ┆ u32 │\n",
"╞═══════════╪═══════╡\n",
"│ b ┆ 19 │\n",
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ c ┆ 19 │\n",
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ f ┆ 17 │\n",
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ a ┆ 16 │\n",
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ e ┆ 13 │\n",
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ g ┆ 13 │\n",
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
"│ d ┆ 3 │\n",
"└───────────┴───────┘"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"first = pl.col(\"vote\").arr.eval(pl.element().first()).explode().alias(\"first\") # selection expression; who's winning right now?\n",
"count_ballots = lambda df: (df # function to count ballots. We'll need to apply this repeatedly for each round of IRV.\n",
" .filter(pl.col(\"vote\").arr.eval(pl.element().len()).explode() > 0)\n",
" .select(first.alias(\"candidate\"))\n",
" .groupby(\"candidate\")\n",
" .agg([pl.count()])\n",
" .sort(\"count\"))\n",
"\n",
"first_round = count_ballots(ballots) # if we were doing plurality voting, we'd stop here and whoever was on top would win.\n",
"first_round.reverse()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('c', 21), ('b', 19), ('f', 18), ('a', 16), ('e', 13), ('g', 13)]\n",
"[('b', 24), ('a', 23), ('c', 21), ('f', 18), ('e', 14)]\n",
"[('a', 33), ('c', 25), ('b', 24), ('f', 18)]\n",
"[('b', 37), ('a', 33), ('c', 30)]\n",
"[('b', 67), ('a', 33)]\n"
]
}
],
"source": [
"rounds = [first_round]\n",
"live_ballots = ballots\n",
"loser = first_round[\"candidate\"].take(0)\n",
"tail = lambda loser: pl.col(\"vote\").arr.eval(pl.element().filter(pl.element() != loser)).alias(\"vote\")\n",
"frontrunner_votes = rounds[-1][\"count\"].reverse().take(0)[0] # how many votes does the frontrunner have?\n",
"while frontrunner_votes < 50: # until the frontrunner has a majority, we keep going\n",
" live_ballots = live_ballots.select([\"x\", \"y\", tail(loser)])\n",
" n_round = count_ballots(live_ballots)\n",
" rounds.append(n_round)\n",
" loser = n_round[\"candidate\"].take(0)\n",
" frontrunner_votes = n_round[\"count\"].reverse().take(0)[0]\n",
" print(list(zip(list(n_round[\"candidate\"].reverse()), list(n_round[\"count\"].reverse()))))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/vscode/.local/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
" for col_name, dtype in df.dtypes.iteritems():\n"
]
},
{
"data": {
"text/html": [
"\n",
"<div id=\"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\") {\n",
" outputDiv = document.getElementById(\"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\");\n",
" }\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm//[email protected]?noext\",\n",
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
" };\n",
"\n",
" function maybeLoadScript(lib, version) {\n",
" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
" return (VEGA_DEBUG[key] == version) ?\n",
" Promise.resolve(paths[lib]) :\n",
" new Promise(function(resolve, reject) {\n",
" var s = document.createElement('script');\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" s.async = true;\n",
" s.onload = () => {\n",
" VEGA_DEBUG[key] = version;\n",
" return resolve(paths[lib]);\n",
" };\n",
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
" s.src = paths[lib];\n",
" });\n",
" }\n",
"\n",
" function showError(err) {\n",
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
" throw err;\n",
" }\n",
"\n",
" function displayChart(vegaEmbed) {\n",
" vegaEmbed(outputDiv, spec, embedOpt)\n",
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
" }\n",
"\n",
" if(typeof define === \"function\" && define.amd) {\n",
" requirejs.config({paths});\n",
" require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
" } else {\n",
" maybeLoadScript(\"vega\", \"5\")\n",
" .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
" .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n",
" }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-92c2ea8da9f039451aa5417bfe50b5fa\"}, \"mark\": \"bar\", \"encoding\": {\"facet\": {\"field\": \"round\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"candidate\", \"type\": \"nominal\"}, \"y\": {\"field\": \"count\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-92c2ea8da9f039451aa5417bfe50b5fa\": [{\"candidate\": \"d\", \"count\": 3, \"round\": 1}, {\"candidate\": \"g\", \"count\": 13, \"round\": 1}, {\"candidate\": \"e\", \"count\": 13, \"round\": 1}, {\"candidate\": \"a\", \"count\": 16, \"round\": 1}, {\"candidate\": \"f\", \"count\": 17, \"round\": 1}, {\"candidate\": \"c\", \"count\": 19, \"round\": 1}, {\"candidate\": \"b\", \"count\": 19, \"round\": 1}, {\"candidate\": \"g\", \"count\": 13, \"round\": 2}, {\"candidate\": \"e\", \"count\": 13, \"round\": 2}, {\"candidate\": \"a\", \"count\": 16, \"round\": 2}, {\"candidate\": \"f\", \"count\": 18, \"round\": 2}, {\"candidate\": \"b\", \"count\": 19, \"round\": 2}, {\"candidate\": \"c\", \"count\": 21, \"round\": 2}, {\"candidate\": \"e\", \"count\": 14, \"round\": 3}, {\"candidate\": \"f\", \"count\": 18, \"round\": 3}, {\"candidate\": \"c\", \"count\": 21, \"round\": 3}, {\"candidate\": \"a\", \"count\": 23, \"round\": 3}, {\"candidate\": \"b\", \"count\": 24, \"round\": 3}, {\"candidate\": \"f\", \"count\": 18, \"round\": 4}, {\"candidate\": \"b\", \"count\": 24, \"round\": 4}, {\"candidate\": \"c\", \"count\": 25, \"round\": 4}, {\"candidate\": \"a\", \"count\": 33, \"round\": 4}, {\"candidate\": \"c\", \"count\": 30, \"round\": 5}, {\"candidate\": \"a\", \"count\": 33, \"round\": 5}, {\"candidate\": \"b\", \"count\": 37, \"round\": 5}, {\"candidate\": \"a\", \"count\": 33, \"round\": 6}, {\"candidate\": \"b\", \"count\": 67, \"round\": 6}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>"
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_rounds = pl.concat([round.select([pl.all(), pl.lit(i + 1).alias(\"round\")]) for i, round in enumerate(rounds)]).to_pandas()\n",
"alt.Chart(all_rounds).mark_bar().encode(\n",
" facet='round',\n",
" x='candidate',\n",
" y='count'\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.10 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@schicks
Copy link
Author

schicks commented Sep 20, 2022

Sad that altair charts don't seem to work in gists, but the random seed should mean that this is runnable locally if you are curious.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment