Skip to content

Instantly share code, notes, and snippets.

@ghimiremukesh
Created February 18, 2024 09:10
Show Gist options
  • Save ghimiremukesh/d0632a6c076119f89d199d114d7181d8 to your computer and use it in GitHub Desktop.
Save ghimiremukesh/d0632a6c076119f89d199d114d7181d8 to your computer and use it in GitHub Desktop.
CFR_RPS.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ghimiremukesh/d0632a6c076119f89d199d114d7181d8/cfr_rps.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "a2eI2J40e4O8"
},
"source": [
"### Counterfactual Regret Minimization (CFR) in Perturbed RPS"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FjzfwORee4O9"
},
"outputs": [],
"source": [
"import random\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "g_UmUVQCe4O9"
},
"outputs": [],
"source": [
"# ['R', 'P', 'S'] = 0, 1, 2\n",
"def get_strategy(strategy_sum, regret_sum):\n",
" normalizing_sum = 0\n",
" strategy = regret_sum * (regret_sum > 0)\n",
" normalizing_sum += sum(strategy)\n",
"\n",
" if normalizing_sum > 0:\n",
" strategy = strategy/normalizing_sum\n",
" else:\n",
" strategy = np.ones(3)/3\n",
"\n",
" strategy_sum += strategy\n",
"\n",
" return strategy, strategy_sum\n",
"\n",
"def get_action(strategy):\n",
" actions = [0, 1, 2]\n",
" try:\n",
" action = np.random.choice(actions, p=strategy)\n",
" except:\n",
" action = 0\n",
"\n",
" return action\n",
"\n",
"def train(itertaions, strategy_sum_1, regret_sum_1, strategy_sum_2, regret_sum_2):\n",
" utility_1 = np.zeros(3)\n",
"\n",
" for i in range(iterations):\n",
" strategy_1, strategy_sum_1 = get_strategy(strategy_sum_1, regret_sum_1)\n",
" my_action = get_action(strategy_1)\n",
" strategy_2, strategy_sum_2 = get_strategy(strategy_sum_2, regret_sum_2)\n",
" op_action = get_action(strategy_2)\n",
"\n",
" if op_action == 0: # p2 chooses rock\n",
" utility_1[1] = 1\n",
" utility_1[2] = -2 # -2 for pert.\n",
" elif op_action == 1:\n",
" utility_1[0] = -1\n",
" utility_1[2] = 2 # 2 for pert.\n",
" else:\n",
" utility_1[0] = 2 # 2\n",
" utility_1[1] = -2 # -2\n",
"\n",
"\n",
" regret_1 = utility_1 - utility_1[my_action]\n",
" regret_sum_1 += regret_1\n",
" regret_sum_2 -= regret_1 # zero-sum\n",
"\n",
" return strategy_sum_1, strategy_sum_2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Wg2PFoyne4O-"
},
"outputs": [],
"source": [
"regret_sum_1 = np.zeros(3)\n",
"regret_sum_2 = np.zeros(3)\n",
"stratey_sum_1 = np.zeros(3)\n",
"strategy_sum_2 = np.zeros(3)\n",
"\n",
"iterations = 1000000\n",
"\n",
"s1, s2 = train(iterations, stratey_sum_1, regret_sum_1, strategy_sum_2, regret_sum_2)\n",
"s1 /= sum(s1)\n",
"s2 /= sum(s2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XT7WRhose4O-",
"outputId": "6bd2df59-d2db-42f6-eea6-c5aac9c2592b"
},
"outputs": [
{
"data": {
"text/plain": [
"(array([0.23638947, 0.22978905, 0.53382148]),\n",
" array([0.39194694, 0.39866955, 0.2093835 ]))"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1, s2"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"colab": {
"provenance": [],
"include_colab_link": true
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment