schicks · September 20, 2022 16:00 · schicks · Sep 20, 2022
diff --git a/requirements.txt b/requirements.txt
 polars
 jupyter
 numpy
 pyarrow
 pandas
 altair
diff --git a/voting.ipynb b/voting.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars as pl\n",
    "import numpy as np\n",
    "import altair as alt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(123457789))) #seeded random state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "voters = pl.DataFrame({ # imagine that voters opinions can be mapped into a 2d plane\n",
    "    \"x\": pl.Series(values=rs.normal(size=100, ).tolist()),\n",
    "    \"y\": pl.Series(values=rs.normal(size=100).tolist())\n",
    "})\n",
    "candidates = pl.DataFrame({ # and that they prefer candidates that are close to them\n",
    "    \"x\": pl.Series(values=rs.normal(size=7).tolist()),\n",
    "    \"y\": pl.Series(values=rs.normal(size=7).tolist()),\n",
    "    \"name\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\"]\n",
    "})\n",
    "election = voters.join(candidates, how=\"cross\", suffix=\"_candidate\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        white-space: pre;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        padding-top: 0;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        padding-bottom: 0;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\" >\n",
       "<small>shape: (7, 2)</small>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>\n",
       "name\n",
       "</th>\n",
       "<th>\n",
       "count\n",
       "</th>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "str\n",
       "</td>\n",
       "<td>\n",
       "u32\n",
       "</td>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;b&quot;\n",
       "</td>\n",
       "<td>\n",
       "35\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;c&quot;\n",
       "</td>\n",
       "<td>\n",
       "32\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;f&quot;\n",
       "</td>\n",
       "<td>\n",
       "32\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;g&quot;\n",
       "</td>\n",
       "<td>\n",
       "30\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;e&quot;\n",
       "</td>\n",
       "<td>\n",
       "24\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;a&quot;\n",
       "</td>\n",
       "<td>\n",
       "22\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;d&quot;\n",
       "</td>\n",
       "<td>\n",
       "5\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "shape: (7, 2)\n",
       "┌──────┬───────┐\n",
       "│ name ┆ count │\n",
       "│ ---  ┆ ---   │\n",
       "│ str  ┆ u32   │\n",
       "╞══════╪═══════╡\n",
       "│ b    ┆ 35    │\n",
       "├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ c    ┆ 32    │\n",
       "├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ f    ┆ 32    │\n",
       "├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ g    ┆ 30    │\n",
       "├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ e    ┆ 24    │\n",
       "├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ a    ┆ 22    │\n",
       "├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ d    ┆ 5     │\n",
       "└──────┴───────┘"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xdist = (pl.col(\"x\") - pl.col(\"x_candidate\")).pow(2)\n",
    "ydist = (pl.col(\"y\") - pl.col(\"y_candidate\")).pow(2)\n",
    "distance = (xdist + ydist).sqrt().alias(\"distance\")\n",
    "approval_vote = distance <= 1 # simple approval voting strategy; vote for anyone within a set distance o fyou\n",
    "\n",
    "election.lazy().select([ # approval voting can be calculated with a single expression!\n",
    "    pl.col(\"name\").filter(approval_vote),\n",
    "]).groupby(\"name\").agg([pl.count()]).sort(\"count\", reverse=True).collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        white-space: pre;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        padding-top: 0;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        padding-bottom: 0;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\" >\n",
       "<small>shape: (3, 3)</small>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>\n",
       "x\n",
       "</th>\n",
       "<th>\n",
       "y\n",
       "</th>\n",
       "<th>\n",
       "vote\n",
       "</th>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "f64\n",
       "</td>\n",
       "<td>\n",
       "f64\n",
       "</td>\n",
       "<td>\n",
       "list[str]\n",
       "</td>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>\n",
       "0.361286\n",
       "</td>\n",
       "<td>\n",
       "-1.464302\n",
       "</td>\n",
       "<td>\n",
       "[&quot;f&quot;, &quot;b&quot;, ... &quot;a&quot;]\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "1.127261\n",
       "</td>\n",
       "<td>\n",
       "-0.774722\n",
       "</td>\n",
       "<td>\n",
       "[&quot;b&quot;, &quot;f&quot;, ... &quot;d&quot;]\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "0.25419\n",
       "</td>\n",
       "<td>\n",
       "-0.841139\n",
       "</td>\n",
       "<td>\n",
       "[&quot;b&quot;, &quot;f&quot;, ... &quot;d&quot;]\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "shape: (3, 3)\n",
       "┌──────────┬───────────┬─────────────────────┐\n",
       "│ x        ┆ y         ┆ vote                │\n",
       "│ ---      ┆ ---       ┆ ---                 │\n",
       "│ f64      ┆ f64       ┆ list[str]           │\n",
       "╞══════════╪═══════════╪═════════════════════╡\n",
       "│ 0.361286 ┆ -1.464302 ┆ [\"f\", \"b\", ... \"a\"] │\n",
       "├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
       "│ 1.127261 ┆ -0.774722 ┆ [\"b\", \"f\", ... \"d\"] │\n",
       "├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
       "│ 0.25419  ┆ -0.841139 ┆ [\"b\", \"f\", ... \"d\"] │\n",
       "└──────────┴───────────┴─────────────────────┘"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ballots = election.lazy().select([ # ranked methods are more complicated. Generate a frame of ballots to start with.\n",
    "    \"x\",\n",
    "    \"y\",\n",
    "    \"name\",\n",
    "    distance\n",
    "]).groupby([\"x\", \"y\"]).agg([pl.col(\"name\").sort_by(\"distance\").list().alias(\"vote\")]).collect()\n",
    "ballots.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        white-space: pre;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        padding-top: 0;\n",
       "    }\n",
       "\n",
       "    .dataframe td {\n",
       "        padding-bottom: 0;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\" >\n",
       "<small>shape: (7, 2)</small>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>\n",
       "candidate\n",
       "</th>\n",
       "<th>\n",
       "count\n",
       "</th>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "str\n",
       "</td>\n",
       "<td>\n",
       "u32\n",
       "</td>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;b&quot;\n",
       "</td>\n",
       "<td>\n",
       "19\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;c&quot;\n",
       "</td>\n",
       "<td>\n",
       "19\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;f&quot;\n",
       "</td>\n",
       "<td>\n",
       "17\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;a&quot;\n",
       "</td>\n",
       "<td>\n",
       "16\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;e&quot;\n",
       "</td>\n",
       "<td>\n",
       "13\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;g&quot;\n",
       "</td>\n",
       "<td>\n",
       "13\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>\n",
       "&quot;d&quot;\n",
       "</td>\n",
       "<td>\n",
       "3\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "shape: (7, 2)\n",
       "┌───────────┬───────┐\n",
       "│ candidate ┆ count │\n",
       "│ ---       ┆ ---   │\n",
       "│ str       ┆ u32   │\n",
       "╞═══════════╪═══════╡\n",
       "│ b         ┆ 19    │\n",
       "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ c         ┆ 19    │\n",
       "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ f         ┆ 17    │\n",
       "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ a         ┆ 16    │\n",
       "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ e         ┆ 13    │\n",
       "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ g         ┆ 13    │\n",
       "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
       "│ d         ┆ 3     │\n",
       "└───────────┴───────┘"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first = pl.col(\"vote\").arr.eval(pl.element().first()).explode().alias(\"first\") # selection expression; who's winning right now?\n",
    "count_ballots = lambda df: (df # function to count ballots. We'll need to apply this repeatedly for each round of IRV.\n",
    "    .filter(pl.col(\"vote\").arr.eval(pl.element().len()).explode() > 0)\n",
    "    .select(first.alias(\"candidate\"))\n",
    "    .groupby(\"candidate\")\n",
    "    .agg([pl.count()])\n",
    "    .sort(\"count\"))\n",
    "\n",
    "first_round = count_ballots(ballots) # if we were doing plurality voting, we'd stop here and whoever was on top would win.\n",
    "first_round.reverse()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('c', 21), ('b', 19), ('f', 18), ('a', 16), ('e', 13), ('g', 13)]\n",
      "[('b', 24), ('a', 23), ('c', 21), ('f', 18), ('e', 14)]\n",
      "[('a', 33), ('c', 25), ('b', 24), ('f', 18)]\n",
      "[('b', 37), ('a', 33), ('c', 30)]\n",
      "[('b', 67), ('a', 33)]\n"
     ]
    }
   ],
   "source": [
    "rounds = [first_round]\n",
    "live_ballots = ballots\n",
    "loser = first_round[\"candidate\"].take(0)\n",
    "tail = lambda loser: pl.col(\"vote\").arr.eval(pl.element().filter(pl.element() != loser)).alias(\"vote\")\n",
    "frontrunner_votes = rounds[-1][\"count\"].reverse().take(0)[0] # how many votes does the frontrunner have?\n",
    "while frontrunner_votes < 50: # until the frontrunner has a majority, we keep going\n",
    "    live_ballots = live_ballots.select([\"x\", \"y\", tail(loser)])\n",
    "    n_round = count_ballots(live_ballots)\n",
    "    rounds.append(n_round)\n",
    "    loser = n_round[\"candidate\"].take(0)\n",
    "    frontrunner_votes = n_round[\"count\"].reverse().take(0)[0]\n",
    "    print(list(zip(list(n_round[\"candidate\"].reverse()), list(n_round[\"count\"].reverse()))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vscode/.local/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
      "  for col_name, dtype in df.dtypes.iteritems():\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<div id=\"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm//[email protected]?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-92c2ea8da9f039451aa5417bfe50b5fa\"}, \"mark\": \"bar\", \"encoding\": {\"facet\": {\"field\": \"round\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"candidate\", \"type\": \"nominal\"}, \"y\": {\"field\": \"count\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-92c2ea8da9f039451aa5417bfe50b5fa\": [{\"candidate\": \"d\", \"count\": 3, \"round\": 1}, {\"candidate\": \"g\", \"count\": 13, \"round\": 1}, {\"candidate\": \"e\", \"count\": 13, \"round\": 1}, {\"candidate\": \"a\", \"count\": 16, \"round\": 1}, {\"candidate\": \"f\", \"count\": 17, \"round\": 1}, {\"candidate\": \"c\", \"count\": 19, \"round\": 1}, {\"candidate\": \"b\", \"count\": 19, \"round\": 1}, {\"candidate\": \"g\", \"count\": 13, \"round\": 2}, {\"candidate\": \"e\", \"count\": 13, \"round\": 2}, {\"candidate\": \"a\", \"count\": 16, \"round\": 2}, {\"candidate\": \"f\", \"count\": 18, \"round\": 2}, {\"candidate\": \"b\", \"count\": 19, \"round\": 2}, {\"candidate\": \"c\", \"count\": 21, \"round\": 2}, {\"candidate\": \"e\", \"count\": 14, \"round\": 3}, {\"candidate\": \"f\", \"count\": 18, \"round\": 3}, {\"candidate\": \"c\", \"count\": 21, \"round\": 3}, {\"candidate\": \"a\", \"count\": 23, \"round\": 3}, {\"candidate\": \"b\", \"count\": 24, \"round\": 3}, {\"candidate\": \"f\", \"count\": 18, \"round\": 4}, {\"candidate\": \"b\", \"count\": 24, \"round\": 4}, {\"candidate\": \"c\", \"count\": 25, \"round\": 4}, {\"candidate\": \"a\", \"count\": 33, \"round\": 4}, {\"candidate\": \"c\", \"count\": 30, \"round\": 5}, {\"candidate\": \"a\", \"count\": 33, \"round\": 5}, {\"candidate\": \"b\", \"count\": 37, \"round\": 5}, {\"candidate\": \"a\", \"count\": 33, \"round\": 6}, {\"candidate\": \"b\", \"count\": 67, \"round\": 6}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_rounds = pl.concat([round.select([pl.all(), pl.lit(i + 1).alias(\"round\")]) for i, round in enumerate(rounds)]).to_pandas()\n",
    "alt.Chart(all_rounds).mark_bar().encode(\n",
    "    facet='round',\n",
    "    x='candidate',\n",
    "    y='count'\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"import polars as pl\n",
	"import numpy as np\n",
	"import altair as alt"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(123457789))) #seeded random state"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"voters = pl.DataFrame({ # imagine that voters opinions can be mapped into a 2d plane\n",
	" \"x\": pl.Series(values=rs.normal(size=100, ).tolist()),\n",
	" \"y\": pl.Series(values=rs.normal(size=100).tolist())\n",
	"})\n",
	"candidates = pl.DataFrame({ # and that they prefer candidates that are close to them\n",
	" \"x\": pl.Series(values=rs.normal(size=7).tolist()),\n",
	" \"y\": pl.Series(values=rs.normal(size=7).tolist()),\n",
	" \"name\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\"]\n",
	"})\n",
	"election = voters.join(candidates, how=\"cross\", suffix=\"_candidate\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" white-space: pre;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" padding-top: 0;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" padding-bottom: 0;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\" >\n",
	"<small>shape: (7, 2)</small>\n",
	"<thead>\n",
	"<tr>\n",
	"<th>\n",
	"name\n",
	"</th>\n",
	"<th>\n",
	"count\n",
	"</th>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	"str\n",
	"</td>\n",
	"<td>\n",
	"u32\n",
	"</td>\n",
	"</tr>\n",
	"</thead>\n",
	"<tbody>\n",
	"<tr>\n",
	"<td>\n",
	""b"\n",
	"</td>\n",
	"<td>\n",
	"35\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""c"\n",
	"</td>\n",
	"<td>\n",
	"32\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""f"\n",
	"</td>\n",
	"<td>\n",
	"32\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""g"\n",
	"</td>\n",
	"<td>\n",
	"30\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""e"\n",
	"</td>\n",
	"<td>\n",
	"24\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""a"\n",
	"</td>\n",
	"<td>\n",
	"22\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""d"\n",
	"</td>\n",
	"<td>\n",
	"5\n",
	"</td>\n",
	"</tr>\n",
	"</tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	"shape: (7, 2)\n",
	"┌──────┬───────┐\n",
	"│ name ┆ count │\n",
	"│ --- ┆ --- │\n",
	"│ str ┆ u32 │\n",
	"╞══════╪═══════╡\n",
	"│ b ┆ 35 │\n",
	"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ c ┆ 32 │\n",
	"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ f ┆ 32 │\n",
	"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ g ┆ 30 │\n",
	"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ e ┆ 24 │\n",
	"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ a ┆ 22 │\n",
	"├╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ d ┆ 5 │\n",
	"└──────┴───────┘"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"xdist = (pl.col(\"x\") - pl.col(\"x_candidate\")).pow(2)\n",
	"ydist = (pl.col(\"y\") - pl.col(\"y_candidate\")).pow(2)\n",
	"distance = (xdist + ydist).sqrt().alias(\"distance\")\n",
	"approval_vote = distance <= 1 # simple approval voting strategy; vote for anyone within a set distance o fyou\n",
	"\n",
	"election.lazy().select([ # approval voting can be calculated with a single expression!\n",
	" pl.col(\"name\").filter(approval_vote),\n",
	"]).groupby(\"name\").agg([pl.count()]).sort(\"count\", reverse=True).collect()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" white-space: pre;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" padding-top: 0;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" padding-bottom: 0;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\" >\n",
	"<small>shape: (3, 3)</small>\n",
	"<thead>\n",
	"<tr>\n",
	"<th>\n",
	"x\n",
	"</th>\n",
	"<th>\n",
	"y\n",
	"</th>\n",
	"<th>\n",
	"vote\n",
	"</th>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	"f64\n",
	"</td>\n",
	"<td>\n",
	"f64\n",
	"</td>\n",
	"<td>\n",
	"list[str]\n",
	"</td>\n",
	"</tr>\n",
	"</thead>\n",
	"<tbody>\n",
	"<tr>\n",
	"<td>\n",
	"0.361286\n",
	"</td>\n",
	"<td>\n",
	"-1.464302\n",
	"</td>\n",
	"<td>\n",
	"["f", "b", ... "a"]\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	"1.127261\n",
	"</td>\n",
	"<td>\n",
	"-0.774722\n",
	"</td>\n",
	"<td>\n",
	"["b", "f", ... "d"]\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	"0.25419\n",
	"</td>\n",
	"<td>\n",
	"-0.841139\n",
	"</td>\n",
	"<td>\n",
	"["b", "f", ... "d"]\n",
	"</td>\n",
	"</tr>\n",
	"</tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	"shape: (3, 3)\n",
	"┌──────────┬───────────┬─────────────────────┐\n",
	"│ x ┆ y ┆ vote │\n",
	"│ --- ┆ --- ┆ --- │\n",
	"│ f64 ┆ f64 ┆ list[str] │\n",
	"╞══════════╪═══════════╪═════════════════════╡\n",
	"│ 0.361286 ┆ -1.464302 ┆ [\"f\", \"b\", ... \"a\"] │\n",
	"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
	"│ 1.127261 ┆ -0.774722 ┆ [\"b\", \"f\", ... \"d\"] │\n",
	"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
	"│ 0.25419 ┆ -0.841139 ┆ [\"b\", \"f\", ... \"d\"] │\n",
	"└──────────┴───────────┴─────────────────────┘"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ballots = election.lazy().select([ # ranked methods are more complicated. Generate a frame of ballots to start with.\n",
	" \"x\",\n",
	" \"y\",\n",
	" \"name\",\n",
	" distance\n",
	"]).groupby([\"x\", \"y\"]).agg([pl.col(\"name\").sort_by(\"distance\").list().alias(\"vote\")]).collect()\n",
	"ballots.head(3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" white-space: pre;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" padding-top: 0;\n",
	" }\n",
	"\n",
	" .dataframe td {\n",
	" padding-bottom: 0;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\" >\n",
	"<small>shape: (7, 2)</small>\n",
	"<thead>\n",
	"<tr>\n",
	"<th>\n",
	"candidate\n",
	"</th>\n",
	"<th>\n",
	"count\n",
	"</th>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	"str\n",
	"</td>\n",
	"<td>\n",
	"u32\n",
	"</td>\n",
	"</tr>\n",
	"</thead>\n",
	"<tbody>\n",
	"<tr>\n",
	"<td>\n",
	""b"\n",
	"</td>\n",
	"<td>\n",
	"19\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""c"\n",
	"</td>\n",
	"<td>\n",
	"19\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""f"\n",
	"</td>\n",
	"<td>\n",
	"17\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""a"\n",
	"</td>\n",
	"<td>\n",
	"16\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""e"\n",
	"</td>\n",
	"<td>\n",
	"13\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""g"\n",
	"</td>\n",
	"<td>\n",
	"13\n",
	"</td>\n",
	"</tr>\n",
	"<tr>\n",
	"<td>\n",
	""d"\n",
	"</td>\n",
	"<td>\n",
	"3\n",
	"</td>\n",
	"</tr>\n",
	"</tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	"shape: (7, 2)\n",
	"┌───────────┬───────┐\n",
	"│ candidate ┆ count │\n",
	"│ --- ┆ --- │\n",
	"│ str ┆ u32 │\n",
	"╞═══════════╪═══════╡\n",
	"│ b ┆ 19 │\n",
	"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ c ┆ 19 │\n",
	"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ f ┆ 17 │\n",
	"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ a ┆ 16 │\n",
	"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ e ┆ 13 │\n",
	"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ g ┆ 13 │\n",
	"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n",
	"│ d ┆ 3 │\n",
	"└───────────┴───────┘"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"first = pl.col(\"vote\").arr.eval(pl.element().first()).explode().alias(\"first\") # selection expression; who's winning right now?\n",
	"count_ballots = lambda df: (df # function to count ballots. We'll need to apply this repeatedly for each round of IRV.\n",
	" .filter(pl.col(\"vote\").arr.eval(pl.element().len()).explode() > 0)\n",
	" .select(first.alias(\"candidate\"))\n",
	" .groupby(\"candidate\")\n",
	" .agg([pl.count()])\n",
	" .sort(\"count\"))\n",
	"\n",
	"first_round = count_ballots(ballots) # if we were doing plurality voting, we'd stop here and whoever was on top would win.\n",
	"first_round.reverse()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[('c', 21), ('b', 19), ('f', 18), ('a', 16), ('e', 13), ('g', 13)]\n",
	"[('b', 24), ('a', 23), ('c', 21), ('f', 18), ('e', 14)]\n",
	"[('a', 33), ('c', 25), ('b', 24), ('f', 18)]\n",
	"[('b', 37), ('a', 33), ('c', 30)]\n",
	"[('b', 67), ('a', 33)]\n"
	]
	}
	],
	"source": [
	"rounds = [first_round]\n",
	"live_ballots = ballots\n",
	"loser = first_round[\"candidate\"].take(0)\n",
	"tail = lambda loser: pl.col(\"vote\").arr.eval(pl.element().filter(pl.element() != loser)).alias(\"vote\")\n",
	"frontrunner_votes = rounds[-1][\"count\"].reverse().take(0)[0] # how many votes does the frontrunner have?\n",
	"while frontrunner_votes < 50: # until the frontrunner has a majority, we keep going\n",
	" live_ballots = live_ballots.select([\"x\", \"y\", tail(loser)])\n",
	" n_round = count_ballots(live_ballots)\n",
	" rounds.append(n_round)\n",
	" loser = n_round[\"candidate\"].take(0)\n",
	" frontrunner_votes = n_round[\"count\"].reverse().take(0)[0]\n",
	" print(list(zip(list(n_round[\"candidate\"].reverse()), list(n_round[\"count\"].reverse()))))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/home/vscode/.local/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
	" for col_name, dtype in df.dtypes.iteritems():\n"
	]
	},
	{
	"data": {
	"text/html": [
	"\n",
	"<div id=\"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\"></div>\n",
	"<script type=\"text/javascript\">\n",
	" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
	" (function(spec, embedOpt){\n",
	" let outputDiv = document.currentScript.previousElementSibling;\n",
	" if (outputDiv.id !== \"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\") {\n",
	" outputDiv = document.getElementById(\"altair-viz-ec65f47c12ff4347bf5c2a7a810f7662\");\n",
	" }\n",
	" const paths = {\n",
	" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
	" \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
	" \"vega-lite\": \"https://cdn.jsdelivr.net/npm//[email protected]?noext\",\n",
	" \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
	" };\n",
	"\n",
	" function maybeLoadScript(lib, version) {\n",
	" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
	" return (VEGA_DEBUG[key] == version) ?\n",
	" Promise.resolve(paths[lib]) :\n",
	" new Promise(function(resolve, reject) {\n",
	" var s = document.createElement('script');\n",
	" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
	" s.async = true;\n",
	" s.onload = () => {\n",
	" VEGA_DEBUG[key] = version;\n",
	" return resolve(paths[lib]);\n",
	" };\n",
	" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
	" s.src = paths[lib];\n",
	" });\n",
	" }\n",
	"\n",
	" function showError(err) {\n",
	" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
	" throw err;\n",
	" }\n",
	"\n",
	" function displayChart(vegaEmbed) {\n",
	" vegaEmbed(outputDiv, spec, embedOpt)\n",
	" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
	" }\n",
	"\n",
	" if(typeof define === \"function\" && define.amd) {\n",
	" requirejs.config({paths});\n",
	" require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
	" } else {\n",
	" maybeLoadScript(\"vega\", \"5\")\n",
	" .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
	" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
	" .catch(showError)\n",
	" .then(() => displayChart(vegaEmbed));\n",
	" }\n",
	" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-92c2ea8da9f039451aa5417bfe50b5fa\"}, \"mark\": \"bar\", \"encoding\": {\"facet\": {\"field\": \"round\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"candidate\", \"type\": \"nominal\"}, \"y\": {\"field\": \"count\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-92c2ea8da9f039451aa5417bfe50b5fa\": [{\"candidate\": \"d\", \"count\": 3, \"round\": 1}, {\"candidate\": \"g\", \"count\": 13, \"round\": 1}, {\"candidate\": \"e\", \"count\": 13, \"round\": 1}, {\"candidate\": \"a\", \"count\": 16, \"round\": 1}, {\"candidate\": \"f\", \"count\": 17, \"round\": 1}, {\"candidate\": \"c\", \"count\": 19, \"round\": 1}, {\"candidate\": \"b\", \"count\": 19, \"round\": 1}, {\"candidate\": \"g\", \"count\": 13, \"round\": 2}, {\"candidate\": \"e\", \"count\": 13, \"round\": 2}, {\"candidate\": \"a\", \"count\": 16, \"round\": 2}, {\"candidate\": \"f\", \"count\": 18, \"round\": 2}, {\"candidate\": \"b\", \"count\": 19, \"round\": 2}, {\"candidate\": \"c\", \"count\": 21, \"round\": 2}, {\"candidate\": \"e\", \"count\": 14, \"round\": 3}, {\"candidate\": \"f\", \"count\": 18, \"round\": 3}, {\"candidate\": \"c\", \"count\": 21, \"round\": 3}, {\"candidate\": \"a\", \"count\": 23, \"round\": 3}, {\"candidate\": \"b\", \"count\": 24, \"round\": 3}, {\"candidate\": \"f\", \"count\": 18, \"round\": 4}, {\"candidate\": \"b\", \"count\": 24, \"round\": 4}, {\"candidate\": \"c\", \"count\": 25, \"round\": 4}, {\"candidate\": \"a\", \"count\": 33, \"round\": 4}, {\"candidate\": \"c\", \"count\": 30, \"round\": 5}, {\"candidate\": \"a\", \"count\": 33, \"round\": 5}, {\"candidate\": \"b\", \"count\": 37, \"round\": 5}, {\"candidate\": \"a\", \"count\": 33, \"round\": 6}, {\"candidate\": \"b\", \"count\": 67, \"round\": 6}]}}, {\"mode\": \"vega-lite\"});\n",
	"</script>"
	],
	"text/plain": [
	"alt.Chart(...)"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"all_rounds = pl.concat([round.select([pl.all(), pl.lit(i + 1).alias(\"round\")]) for i, round in enumerate(rounds)]).to_pandas()\n",
	"alt.Chart(all_rounds).mark_bar().encode(\n",
	" facet='round',\n",
	" x='candidate',\n",
	" y='count'\n",
	")"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3.8.10 64-bit",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.6"
	},
	"orig_nbformat": 4,
	"vscode": {
	"interpreter": {
	"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}