ghimiremukesh · February 18, 2024 09:10
diff --git a/cfr_rps.ipynb b/cfr_rps.ipynb
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/ghimiremukesh/d0632a6c076119f89d199d114d7181d8/cfr_rps.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "a2eI2J40e4O8"
      },
      "source": [
        "### Counterfactual Regret Minimization (CFR) in Perturbed RPS"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FjzfwORee4O9"
      },
      "outputs": [],
      "source": [
        "import random\n",
        "import numpy as np"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "g_UmUVQCe4O9"
      },
      "outputs": [],
      "source": [
        "# ['R', 'P', 'S'] = 0, 1, 2\n",
        "def get_strategy(strategy_sum, regret_sum):\n",
        "    normalizing_sum = 0\n",
        "    strategy = regret_sum * (regret_sum > 0)\n",
        "    normalizing_sum += sum(strategy)\n",
        "\n",
        "    if normalizing_sum > 0:\n",
        "        strategy = strategy/normalizing_sum\n",
        "    else:\n",
        "        strategy = np.ones(3)/3\n",
        "\n",
        "    strategy_sum += strategy\n",
        "\n",
        "    return strategy, strategy_sum\n",
        "\n",
        "def get_action(strategy):\n",
        "    actions = [0, 1, 2]\n",
        "    try:\n",
        "        action = np.random.choice(actions, p=strategy)\n",
        "    except:\n",
        "        action = 0\n",
        "\n",
        "    return action\n",
        "\n",
        "def train(itertaions, strategy_sum_1, regret_sum_1, strategy_sum_2, regret_sum_2):\n",
        "    utility_1 = np.zeros(3)\n",
        "\n",
        "    for i in range(iterations):\n",
        "        strategy_1, strategy_sum_1 = get_strategy(strategy_sum_1, regret_sum_1)\n",
        "        my_action = get_action(strategy_1)\n",
        "        strategy_2, strategy_sum_2 = get_strategy(strategy_sum_2, regret_sum_2)\n",
        "        op_action = get_action(strategy_2)\n",
        "\n",
        "        if op_action == 0: # p2 chooses rock\n",
        "            utility_1[1] = 1\n",
        "            utility_1[2] = -2 # -2 for pert.\n",
        "        elif op_action == 1:\n",
        "            utility_1[0] = -1\n",
        "            utility_1[2] = 2 # 2 for pert.\n",
        "        else:\n",
        "            utility_1[0] = 2 # 2\n",
        "            utility_1[1] = -2 # -2\n",
        "\n",
        "\n",
        "        regret_1 = utility_1 - utility_1[my_action]\n",
        "        regret_sum_1 += regret_1\n",
        "        regret_sum_2 -= regret_1  # zero-sum\n",
        "\n",
        "    return strategy_sum_1, strategy_sum_2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Wg2PFoyne4O-"
      },
      "outputs": [],
      "source": [
        "regret_sum_1 = np.zeros(3)\n",
        "regret_sum_2 = np.zeros(3)\n",
        "stratey_sum_1 = np.zeros(3)\n",
        "strategy_sum_2 = np.zeros(3)\n",
        "\n",
        "iterations = 1000000\n",
        "\n",
        "s1, s2 = train(iterations, stratey_sum_1, regret_sum_1, strategy_sum_2, regret_sum_2)\n",
        "s1 /= sum(s1)\n",
        "s2 /= sum(s2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "XT7WRhose4O-",
        "outputId": "6bd2df59-d2db-42f6-eea6-c5aac9c2592b"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "(array([0.23638947, 0.22978905, 0.53382148]),\n",
              " array([0.39194694, 0.39866955, 0.2093835 ]))"
            ]
          },
          "execution_count": 147,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "s1, s2"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.13"
    },
    "colab": {
      "provenance": [],
      "include_colab_link": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/ghimiremukesh/d0632a6c076119f89d199d114d7181d8/cfr_rps.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "a2eI2J40e4O8"
	},
	"source": [
	"### Counterfactual Regret Minimization (CFR) in Perturbed RPS"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "FjzfwORee4O9"
	},
	"outputs": [],
	"source": [
	"import random\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "g_UmUVQCe4O9"
	},
	"outputs": [],
	"source": [
	"# ['R', 'P', 'S'] = 0, 1, 2\n",
	"def get_strategy(strategy_sum, regret_sum):\n",
	" normalizing_sum = 0\n",
	" strategy = regret_sum * (regret_sum > 0)\n",
	" normalizing_sum += sum(strategy)\n",
	"\n",
	" if normalizing_sum > 0:\n",
	" strategy = strategy/normalizing_sum\n",
	" else:\n",
	" strategy = np.ones(3)/3\n",
	"\n",
	" strategy_sum += strategy\n",
	"\n",
	" return strategy, strategy_sum\n",
	"\n",
	"def get_action(strategy):\n",
	" actions = [0, 1, 2]\n",
	" try:\n",
	" action = np.random.choice(actions, p=strategy)\n",
	" except:\n",
	" action = 0\n",
	"\n",
	" return action\n",
	"\n",
	"def train(itertaions, strategy_sum_1, regret_sum_1, strategy_sum_2, regret_sum_2):\n",
	" utility_1 = np.zeros(3)\n",
	"\n",
	" for i in range(iterations):\n",
	" strategy_1, strategy_sum_1 = get_strategy(strategy_sum_1, regret_sum_1)\n",
	" my_action = get_action(strategy_1)\n",
	" strategy_2, strategy_sum_2 = get_strategy(strategy_sum_2, regret_sum_2)\n",
	" op_action = get_action(strategy_2)\n",
	"\n",
	" if op_action == 0: # p2 chooses rock\n",
	" utility_1[1] = 1\n",
	" utility_1[2] = -2 # -2 for pert.\n",
	" elif op_action == 1:\n",
	" utility_1[0] = -1\n",
	" utility_1[2] = 2 # 2 for pert.\n",
	" else:\n",
	" utility_1[0] = 2 # 2\n",
	" utility_1[1] = -2 # -2\n",
	"\n",
	"\n",
	" regret_1 = utility_1 - utility_1[my_action]\n",
	" regret_sum_1 += regret_1\n",
	" regret_sum_2 -= regret_1 # zero-sum\n",
	"\n",
	" return strategy_sum_1, strategy_sum_2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "Wg2PFoyne4O-"
	},
	"outputs": [],
	"source": [
	"regret_sum_1 = np.zeros(3)\n",
	"regret_sum_2 = np.zeros(3)\n",
	"stratey_sum_1 = np.zeros(3)\n",
	"strategy_sum_2 = np.zeros(3)\n",
	"\n",
	"iterations = 1000000\n",
	"\n",
	"s1, s2 = train(iterations, stratey_sum_1, regret_sum_1, strategy_sum_2, regret_sum_2)\n",
	"s1 /= sum(s1)\n",
	"s2 /= sum(s2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "XT7WRhose4O-",
	"outputId": "6bd2df59-d2db-42f6-eea6-c5aac9c2592b"
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(array([0.23638947, 0.22978905, 0.53382148]),\n",
	" array([0.39194694, 0.39866955, 0.2093835 ]))"
	]
	},
	"execution_count": 147,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"s1, s2"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.13"
	},
	"colab": {
	"provenance": [],
	"include_colab_link": true
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}