yhilpisch · March 7, 2025 23:38
diff --git a/00_rl4f_odsc.md b/00_rl4f_odsc.md
diff --git a/01_rl4f_odsc.ipynb b/01_rl4f_odsc.ipynb
diff --git a/02_rl4f_odsc.ipynb b/02_rl4f_odsc.ipynb
diff --git a/03_rl4f_odsc.ipynb b/03_rl4f_odsc.ipynb
diff --git a/04_rl4f_odsc.ipynb b/04_rl4f_odsc.ipynb
diff --git a/05_rl4f_odsc.ipynb b/05_rl4f_odsc.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "475819a4-e148-4616-b1cb-44b659aeb08a",
   "metadata": {},
   "source": [
    "<img src=\"http://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e",
   "metadata": {},
   "source": [
    "# Reinforcement Learning for Finance\n",
    "\n",
    "**Chapter 08 &mdash; Asset Allocation**\n",
    "\n",
    "&copy; Dr. Yves J. Hilpisch\n",
    "\n",
    "<a href=\"http://tpq.io\" target=\"_blank\">http://tpq.io</a> | <a href=\"http://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "adcfb8e0-6497-4d2e-a261-6762373fd693",
   "metadata": {},
   "source": [
    "## Capital Market Line"
   ]
  },
  {
   "cell_type": "raw",
   "id": "bcc20fa7-c4ce-44b7-b3ce-080856f592f9",
   "metadata": {},
   "source": [
    "# tag::01[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b74284e7-9506-4793-bc99-016775313b22",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import math\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import stats\n",
    "from pylab import plt, mpl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e80ce705-6c55-46d9-9199-549d8ea689f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.style.use('seaborn-v0_8')\n",
    "mpl.rcParams['figure.dpi'] = 300\n",
    "mpl.rcParams['savefig.dpi'] = 300\n",
    "mpl.rcParams['font.family'] = 'serif'\n",
    "np.set_printoptions(suppress=True)\n",
    "pd.set_option('display.float_format', lambda x: '%.3f' % x)"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ff01b860-4149-4169-b1c4-ea731c67afbf",
   "metadata": {},
   "source": [
    "# end::01[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "fe5d336f-b40d-4151-a444-a8f6d550b588",
   "metadata": {},
   "source": [
    "# tag::02[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee1c1641-9894-48dd-90f0-215c1d276c17",
   "metadata": {},
   "outputs": [],
   "source": [
    "r = 0.025  # <1>\n",
    "beta = 0.2  # <2>\n",
    "sigma = 0.375  # <3>\n",
    "mu = r + beta * sigma  # <4>\n",
    "mu  # <4>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7785eae1-d7cb-4ebd-a9b2-67f73703b667",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "vol = np.linspace(0, 0.5)  # <5>\n",
    "ret = r + beta * vol  # <5>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdf3e816-6483-4694-8566-495bd7154511",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots()\n",
    "plt.plot(vol, ret, 'b', label='capital market line (CML)')\n",
    "plt.plot(0, r, 'g^', label='riskless asset')\n",
    "plt.plot(sigma, mu, 'ro', label='market portfolio')\n",
    "plt.xlabel('volatility/risk')\n",
    "plt.ylabel('expected return')\n",
    "ax.set_xticks((0, sigma))\n",
    "ax.set_xticklabels((0, '$\\sigma$',))\n",
    "ax.set_yticks((0, r, mu))\n",
    "ax.set_yticklabels((0, '$r$', '$\\mu$'))\n",
    "plt.ylim(0, 0.15)\n",
    "plt.legend();\n",
    "# plt.savefig('../figures/figure_08_01');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "b2c6bacd-4fb4-4b10-8a3f-e752d6c285d5",
   "metadata": {},
   "source": [
    "# end::02[]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36861754-df58-40cf-a690-512291df5731",
   "metadata": {},
   "source": [
    "## Investing Environment"
   ]
  },
  {
   "cell_type": "raw",
   "id": "1cd6d772-fd28-4c8d-b616-55251a8db30d",
   "metadata": {},
   "source": [
    "# tag::03[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0585829d-c4a2-494f-a5b9-ba3a0c6d5b1c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class observation_space:\n",
    "    def __init__(self, n):\n",
    "        self.shape = (n,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "754cfe0d-8fff-4d2d-b96a-d6c611de6226",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class action_space:\n",
    "    def __init__(self, n):\n",
    "        self.n = n\n",
    "        \n",
    "    def seed(self, seed):\n",
    "        random.seed(seed)\n",
    "        \n",
    "    def sample(self):\n",
    "        return random.random()  # <1>"
   ]
  },
  {
   "cell_type": "raw",
   "id": "9ab9f6f6-bfa6-4736-8eea-819f4ffce652",
   "metadata": {},
   "source": [
    "# end::03[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "d834537a-d04f-4772-a53d-f14e921f9d39",
   "metadata": {},
   "source": [
    "# tag::04[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "622556ea-c7fc-4418-862e-c0403506f175",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Investing:\n",
    "    def __init__(self, S0, T, r_, mu_, sigma_, steps, amount):\n",
    "        self.initial_value = S0\n",
    "        self.maturity = T\n",
    "        self.short_rate_ = r_  # <1>\n",
    "        self.index_drift_ = mu_  # <1>\n",
    "        self.volatility_ = sigma_  # <1>\n",
    "        self.steps = steps\n",
    "        self.initial_balance = amount  # <2>\n",
    "        self.portfolio_value = amount  # <3>\n",
    "        self.portfolio_value_new = amount  # <4>\n",
    "        self.observation_space = observation_space(4)\n",
    "        self.osn = self.observation_space.shape[0]\n",
    "        self.action_space = action_space(1)\n",
    "        self._generate_data()\n",
    "        self.portfolios = pd.DataFrame()\n",
    "        self.episode = 0"
   ]
  },
  {
   "cell_type": "raw",
   "id": "80cc319c-6eb6-4115-b352-8c392570286f",
   "metadata": {},
   "source": [
    "# end::04[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "f9a4c607-f5d0-4fd4-95f5-fa17ccd2fe53",
   "metadata": {},
   "source": [
    "# tag::05[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bae8dc41-21a9-46e6-abc4-5b18a83a2839",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Investing(Investing):\n",
    "    def _generate_data(self):\n",
    "        s = [self.initial_value]\n",
    "        self.short_rate = random.choice(self.short_rate_)  # <1>\n",
    "        self.index_drift = random.choice(self.index_drift_)  # <1>\n",
    "        self.volatility = random.choice(self.volatility_)  # <1>\n",
    "        self.dt = self.maturity / self.steps\n",
    "        for t in range(1, self.steps + 1):\n",
    "            st = s[t - 1] * math.exp(\n",
    "                ((self.index_drift - self.volatility ** 2 / 2) * self.dt +\n",
    "                  self.volatility * math.sqrt(self.dt) * random.gauss(0, 1))\n",
    "            )  # <2>\n",
    "            s.append(st)\n",
    "        self.data = pd.DataFrame(s, columns=['Xt'])\n",
    "        self.data['Yt'] = self.initial_value * np.exp(\n",
    "            self.short_rate * np.arange(len(self.data)) * self.dt)  # <3>"
   ]
  },
  {
   "cell_type": "raw",
   "id": "fecb868b-073a-4d44-870b-b7223fb159d6",
   "metadata": {},
   "source": [
    "# end::05[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "39e024df-2ef8-4880-af81-b6b3faf2dbf0",
   "metadata": {},
   "source": [
    "# tag::06[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "303c0a10-95af-42e1-8f97-0c9ecc91d4d7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Investing(Investing):\n",
    "    def _get_state(self):\n",
    "        Xt = self.data['Xt'].iloc[self.bar]\n",
    "        Yt = self.data['Yt'].iloc[self.bar]\n",
    "        return np.array([Xt, Yt, self.xt, self.yt]), {}\n",
    "        \n",
    "    def seed(self, seed=None):\n",
    "        if seed is not None:\n",
    "            random.seed(seed)\n",
    "            \n",
    "    def reset(self):\n",
    "        self.bar = 0\n",
    "        self.xt = 0\n",
    "        self.yt = 0\n",
    "        self.treward = 0\n",
    "        self.portfolio_value = self.initial_balance\n",
    "        self.portfolio_value_new = self.initial_balance\n",
    "        self.episode += 1\n",
    "        self._generate_data()\n",
    "        self.state, _ = self._get_state()\n",
    "        return self.state, _"
   ]
  },
  {
   "cell_type": "raw",
   "id": "761d2793-e808-487e-9e9c-1c0c551a7a90",
   "metadata": {},
   "source": [
    "# end::06[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "3856a1db-3153-4479-9474-41d7cc53e0ec",
   "metadata": {},
   "source": [
    "# tag::07[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ceb2abe-a786-4fcf-a570-24e7099cfae8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Investing(Investing):\n",
    "    def add_results(self, pl):\n",
    "        df = pd.DataFrame({'e': self.episode, 'xt': self.xt,\n",
    "                   'yt': self.yt, 'pv': self.portfolio_value,\n",
    "                   'pv_new': self.portfolio_value_new, 'p&l[$]': pl, \n",
    "                   'p&l[%]': pl / self.portfolio_value_new,\n",
    "                   'Xt': self.state[0],  'Yt': self.state[1],\n",
    "                   'Xt_new': self.new_state[0],\n",
    "                   'Yt_new': self.new_state[1],\n",
    "                   'r': self.short_rate, 'mu': self.index_drift,\n",
    "                   'sigma': self.volatility}, index=[0])\n",
    "        self.portfolios = pd.concat((self.portfolios, df),\n",
    "                                    ignore_index=True)\n",
    "        \n",
    "    def step(self, action):\n",
    "        self.bar += 1\n",
    "        self.new_state, _ = self._get_state()\n",
    "        if self.bar == 1:  # <1>\n",
    "            self.xt = action # <2>\n",
    "            self.yt = (1 - action) # <3>\n",
    "            pl = 0.\n",
    "            reward = 0.\n",
    "            self.add_results(pl)\n",
    "        else:\n",
    "            self.portfolio_value_new = (\n",
    "                self.xt * self.portfolio_value *\n",
    "                self.new_state[0] / self.state[0] +\n",
    "                self.yt * self.portfolio_value *\n",
    "                self.new_state[1] / self.state[1])  # <4>\n",
    "            pl = self.portfolio_value_new - self.portfolio_value  # <5>\n",
    "            self.xt = action # <6>\n",
    "            self.yt = (1 - action) # <7>\n",
    "            self.add_results(pl)  # <8>\n",
    "            reward = pl  # <9>\n",
    "            self.portfolio_value = self.portfolio_value_new  # <10>\n",
    "        if self.bar == len(self.data) - 1:\n",
    "            done = True\n",
    "        else:\n",
    "            done = False\n",
    "        self.state = self.new_state\n",
    "        return self.state, reward, done, False, {}"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ec195e72-d855-415c-9505-6b0ec1cff8a5",
   "metadata": {},
   "source": [
    "# end::07[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "b2ff2a12-17f4-44ed-a641-1e93ea5af96c",
   "metadata": {},
   "source": [
    "# tag::08[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35fc17a3-cdac-4fff-960b-b2a5e900bb2e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "S0 = 1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31dfa81a-3ff7-4d93-ad75-d60326a300eb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing = Investing(S0=S0, T=1.0, r_=[0.05], mu_=[0.3],\n",
    "              sigma_=[0.35], steps=252, amount=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "130fa362-6876-4749-88fa-077160c3de51",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing.seed(750)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eceaf5ef-48f2-437f-99a6-1e8fcadbef6a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing._generate_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da77a7be-7ebc-4f00-8aa8-e07d4029a7a5",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing.data.plot(style=['g--', 'b:'], lw=1.0)\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('price');\n",
    "# plt.savefig('../figures/figure_08_02.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "5818dd3c-2aeb-4096-bd47-06644ff1374a",
   "metadata": {},
   "source": [
    "# end::08[]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f7a5bc13-9c60-4f35-93c6-ff4091375089",
   "metadata": {},
   "source": [
    "## Investing Agent"
   ]
  },
  {
   "cell_type": "raw",
   "id": "cfe6d2a2-2c9e-40c9-b28a-985c9c203be7",
   "metadata": {},
   "source": [
    "# tag::09[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3686b411-5f39-487c-91bb-bc794320e590",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3325e79-9775-45e5-9796-cea322034eea",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for _ in range(investing.steps - 1):\n",
    "    investing.step(investing.action_space.sample())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2cf19e9-f371-44f6-9a1e-ccbc126f44f7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing.portfolios.head().round(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24ef9a67-2628-4d0e-a911-57702ba9a239",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing.portfolios[['Xt', 'Yt', 'pv']].plot(\n",
    "    title='PORTFOLIO VALUE | RANDOM AGENT',\n",
    "    style=['g--', 'b:', 'r-'], lw=1)\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('value');\n",
    "# plt.savefig('../figures/figure_08_03.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "44166346-6bb7-4cef-800d-f6a66c30d651",
   "metadata": {},
   "source": [
    "# end::09[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a52562b8-176f-4207-bf6a-cf381d77fa0f",
   "metadata": {},
   "source": [
    "# tag::10[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81052058-331d-41af-99f6-803514a933fc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from dqlagent import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "493701f2-b72d-45f0-87da-f2f1fb98130b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "opt = keras.optimizers.legacy.Adam"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bd8fb5d-39f5-4e83-9118-c78846e545a0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class InvestingAgent(DQLAgent):\n",
    "    def _create_model(self, hu, lr):\n",
    "        self.model = Sequential()\n",
    "        self.model.add(Dense(hu, input_dim=self.n_features,\n",
    "                        activation='relu'))\n",
    "        self.model.add(Dense(hu, activation='relu'))\n",
    "        self.model.add(Dense(1, activation='linear'))  # <1>\n",
    "        self.model.compile(loss='mse',\n",
    "                optimizer=opt(learning_rate=lr))"
   ]
  },
  {
   "cell_type": "raw",
   "id": "8a2edc83-3bfa-4154-815f-7d5bb81c60a9",
   "metadata": {},
   "source": [
    "# end::10[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "e72ea993-84b6-4757-a281-820c260e78c5",
   "metadata": {},
   "source": [
    "# tag::11[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47f55cb0-9513-4a20-aad6-7cba3fc0be7a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from scipy.optimize import minimize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6085e91b-cc2e-4c99-8d05-205892fb0272",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class InvestingAgent(InvestingAgent):\n",
    "    def opt_action(self, state):\n",
    "        bnds = [(0, 1)]  # <1>\n",
    "        def f(state, x):  # <2>\n",
    "            s = state.copy()\n",
    "            s[0, self.xp] = x  # <3>\n",
    "            s[0, self.yp] = 1 - x  # <4>\n",
    "            return self.model.predict(s)[0, 0]  # <5>\n",
    "        action = minimize(lambda x: -f(state, x), 0.5,\n",
    "                        bounds=bnds, method='Nelder-Mead',\n",
    "                        )['x'][0]  # <6>\n",
    "        return action\n",
    "        \n",
    "    def act(self, state):\n",
    "        if random.random() <= self.epsilon:\n",
    "            return self.env.action_space.sample()\n",
    "        action = self.opt_action(state)  # <7>\n",
    "        return action"
   ]
  },
  {
   "cell_type": "raw",
   "id": "146d1691-62c0-42c3-a5f1-3281e4472c2b",
   "metadata": {},
   "source": [
    "# end::11[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "5e9d40af-9c1c-4213-88b6-3988a75d25c3",
   "metadata": {},
   "source": [
    "# tag::12[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f14d3677-9499-4c71-973e-3b9008028dcc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class InvestingAgent(InvestingAgent):\n",
    "    def replay(self):\n",
    "        batch = random.sample(self.memory, self.batch_size)\n",
    "        for state, action, next_state, reward, done in batch:\n",
    "            ns = next_state.copy()\n",
    "            target = reward\n",
    "            if not done:\n",
    "                action = self.opt_action(ns)  # <1>\n",
    "                ns[0, self.xp] = action  # <2>\n",
    "                ns[0, self.yp] = 1 - action  # <3>\n",
    "                target += (self.gamma *\n",
    "                    self.model.predict(ns)[0, 0])  # <4>\n",
    "            self.model.fit(state, np.array([target]),\n",
    "                           epochs=1, verbose=False)\n",
    "        if self.epsilon > self.epsilon_min:\n",
    "            self.epsilon *= self.epsilon_decay"
   ]
  },
  {
   "cell_type": "raw",
   "id": "bcb34fb7-bcdc-465b-a26c-082f4a1364ff",
   "metadata": {},
   "source": [
    "# end::12[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "6593422d-214c-4ccf-9145-e92013e44a96",
   "metadata": {},
   "source": [
    "# tag::13[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5c90b10-fa10-41c4-ad62-555ea76b7c63",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class InvestingAgent(InvestingAgent):\n",
    "    def test(self, episodes, verbose=True):\n",
    "        for e in range(1, episodes + 1):\n",
    "            state, _ = self.env.reset()\n",
    "            state = self._reshape(state)\n",
    "            treward = 0\n",
    "            for _ in range(1, len(self.env.data) + 1):\n",
    "                action = self.opt_action(state)\n",
    "                state, reward, done, trunc, _ = self.env.step(action)\n",
    "                state = self._reshape(state)\n",
    "                treward += reward\n",
    "                if done:\n",
    "                    templ = f'episode={e} | '\n",
    "                    templ += f'total reward={treward:4.2f}'\n",
    "                    if verbose:\n",
    "                        print(templ, end='\\r')\n",
    "                    break"
   ]
  },
  {
   "cell_type": "raw",
   "id": "d769d9be-afcb-48fa-bc27-1c404e2fcdf5",
   "metadata": {},
   "source": [
    "# end::13[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "99d3eef9-3054-4fd8-ae78-944a31702b78",
   "metadata": {},
   "source": [
    "# tag::14[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8473e2f-f8e8-4a47-93e3-faf10c00ee9c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_seeds(seed=500):\n",
    "    random.seed(seed)\n",
    "    np.random.seed(seed)\n",
    "    tf.random.set_seed(seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4bea541-1657-4264-9690-d98b02be7c2e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "set_seeds()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "309333d7-bdb2-4dbd-917b-5ac793213585",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "investing = Investing(S0=S0, T=1.0, r_=[0, 0.025, 0.05],\n",
    "              mu_=[0.05, 0.1, 0.15],\n",
    "              sigma_=[0.1, 0.2, 0.3], steps=252, amount=1) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f442546f-b5da-4081-ab10-69c62132ae4d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent = InvestingAgent('2FS', feature=None, n_features=4,\n",
    "                     env=investing, hu=128, lr=0.00025)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb1c0f47-8ce8-4767-84fd-376ee6a5ab96",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.xp = 2  # <1>\n",
    "agent.yp = 3  # <2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "196cd97b-abe1-4944-b63f-7da29f97c11b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "episodes = 64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "118377b4-cb91-4b33-b7e7-2851b549d48d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%time agent.learn(episodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5da9d2b-70e0-4178-afc6-13c677766fef",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent.epsilon"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a57d45e2-1970-4a58-82f0-7aa56f92803c",
   "metadata": {},
   "source": [
    "# end::14[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "c61f9a93-6fcf-48f3-9fd6-013a55cdf6a6",
   "metadata": {},
   "source": [
    "# tag::15[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6230a337-c7ab-45f7-8507-3a8cb72dce9b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent.env.portfolios = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8d4652b-9e71-4a0a-b38b-26181f53a4c8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%time agent.test(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "151903f4-d5e7-4f9b-afc9-c737d2809b71",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "n = max(agent.env.portfolios['e'])  # <1>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb84d944-8933-4046-bca6-737f411393f4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "res = agent.env.portfolios[agent.env.portfolios['e'] == n]\n",
    "res.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb3926e0-d82a-4624-b49b-f7324905020d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "p = res.iloc[0][['r', 'mu', 'sigma']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b78af9a-a0a9-454e-bbdf-ffde5435f1a2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "t = f\"r={p['r']} | mu={p['mu']} | sigma={p['sigma']}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6bb1b0b-aded-4bd0-a787-18aeeca0b83f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "res[['Xt', 'Yt', 'pv']].plot(\n",
    "    title='PORTFOLIO VALUE | ' + t,\n",
    "    style=['g--', 'b:', 'r-'], lw=1)\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('value');\n",
    "# plt.savefig('../figures/figure_08_04.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "6ec789f9-02e6-49e1-90f1-45c877958652",
   "metadata": {},
   "source": [
    "# end::15[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a1583359-3523-4692-bb48-4aff076d399e",
   "metadata": {},
   "source": [
    "# tag::16[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85ef4f89-1a10-4ecc-af5b-f4750d1559b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "rets = res[['Xt', 'Yt', 'pv']].pct_change(\n",
    "    ).mean() / agent.env.dt  # <1>\n",
    "rets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99e927ca-cef3-4e0c-a9be-81a805f69936",
   "metadata": {},
   "outputs": [],
   "source": [
    "stds = res[['Xt', 'Yt', 'pv']].pct_change(\n",
    "    ).std() / math.sqrt(agent.env.dt)  # <2>\n",
    "stds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2dcaf29-b3e2-417d-b723-7509b8e8168e",
   "metadata": {},
   "outputs": [],
   "source": [
    "rets[['Xt', 'pv']] / stds[['Xt', 'pv']]  # <3>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fecb6a4d-e6f0-4013-b0ee-390d48ee571a",
   "metadata": {},
   "outputs": [],
   "source": [
    "res['xt'].mean()  # <4>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c997138c-f7a8-4072-ba53-076105dd2416",
   "metadata": {},
   "outputs": [],
   "source": [
    "res['xt'].std()  # <5>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d53e260c-fdce-42d3-8058-f155cd418f54",
   "metadata": {},
   "outputs": [],
   "source": [
    "res['xt'].plot(title='RISKY ALLOCATION | ' + t,\n",
    "               lw=1.0, c='b')\n",
    "plt.ylim(res['xt'].min() - 0.1, res['xt'].max() + 0.1)\n",
    "plt.xlabel('time step');\n",
    "# plt.savefig('../figures/figure_08_05.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "196578c8-62ea-4def-84eb-b41bcd003b5d",
   "metadata": {},
   "source": [
    "# end::16[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "8dd2957b-0d6d-4e40-bfd5-4a0f8318ee38",
   "metadata": {},
   "source": [
    "# tag::17[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9237f945-6618-4314-b477-d34bc2389c5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios.groupby('mu')['xt'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ff529ed-da46-4eb4-b97d-f62f6abb4f2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios.groupby('sigma')['xt'].describe()"
   ]
  },
  {
   "cell_type": "raw",
   "id": "5dab67cd-80df-423c-ba31-04a2123e0ad0",
   "metadata": {},
   "source": [
    "# end::17[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ee8ba3ec-f3fd-47da-9700-b4b49f25417d",
   "metadata": {},
   "source": [
    "# tag::18[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8c12127-857d-4459-af42-388866b0caec",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios.groupby('mu')['pv_new'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58bfed3a-46d3-4fa1-a753-90c8a143ff82",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios.groupby('sigma')['pv_new'].describe()"
   ]
  },
  {
   "cell_type": "raw",
   "id": "17afb6ce-5e0c-4bfa-851b-89891b6385ee",
   "metadata": {},
   "source": [
    "# end::18[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "4bb17062-9655-49f2-8c01-9da1a3d30864",
   "metadata": {},
   "source": [
    "# tag::19[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "179d5611-ad66-4bc8-96a7-81ef6c7e1443",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "n = max(agent.env.portfolios['e'])  # <1>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c57cc688-6511-40af-bd3d-a1cee4dc02b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "res = agent.env.portfolios[agent.env.portfolios['e'] == n]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e965dcc4-82a9-4f1c-b19b-59dde5e3048a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "p = res.iloc[0][['r', 'mu', 'sigma']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "370451e0-7b09-4d8b-85c7-13b5e91f38c4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "t = f\"r={p['r']} | mu={p['mu']} | sigma={p['sigma']}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2edc590-2370-4d68-9d9d-f7e28d0e3455",
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = res[['Xt', 'Yt', 'pv', 'xt']].plot(\n",
    "    title='PORTFOLIO VALUE | ' + t,\n",
    "    style=['g--', 'b:', 'r-', 'm-.'], lw=1,\n",
    "    secondary_y='xt'\n",
    ")\n",
    "# plt.savefig('../figures/figure_08_06.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "6203fcfe-9a40-4832-b981-8f9945b2320b",
   "metadata": {},
   "source": [
    "# end::19[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "703bb96c-a86c-47d7-9109-a3b7a9c138c7",
   "metadata": {},
   "source": [
    "xxxxx"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d304115-a4dd-4a5b-967e-2f4a3ddb07b6",
   "metadata": {},
   "source": [
    "## Two Assets"
   ]
  },
  {
   "cell_type": "raw",
   "id": "1d283e02-311a-4fda-8faa-dbb078c9795a",
   "metadata": {},
   "source": [
    "# tag::20[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc0b66c9-03fd-4ee5-be97-5259ec0985fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Investing(Investing):\n",
    "    def __init__(self, asset_one='.SPX', asset_two='.VIX',\n",
    "                 steps=252, amount=1):\n",
    "        self.asset_one = asset_one\n",
    "        self.asset_two = asset_two\n",
    "        self.steps = steps\n",
    "        self.initial_balance = amount\n",
    "        self.portfolio_value = amount\n",
    "        self.portfolio_value_new = amount\n",
    "        self.observation_space = observation_space(4)\n",
    "        self.osn = self.observation_space.shape[0]\n",
    "        self.action_space = action_space(1)\n",
    "        self.retrieved = False\n",
    "        self._generate_data()\n",
    "        self.portfolios = pd.DataFrame()\n",
    "        self.episode = 0\n",
    "        \n",
    "    def _generate_data(self):\n",
    "        if self.retrieved:\n",
    "            pass\n",
    "        else:\n",
    "            url = 'https://certificate.tpq.io/rl4finance.csv'  # <1>\n",
    "            self.raw = pd.read_csv(url, index_col=0,\n",
    "                                   parse_dates=True).dropna()  # <1>\n",
    "            self.retrieved = True\n",
    "        self.data = pd.DataFrame()\n",
    "        self.data['Xt'] = self.raw[self.asset_one]\n",
    "        self.data['Yt'] = self.raw[self.asset_two]\n",
    "        s = random.randint(self.steps, len(self.data))  # <2>\n",
    "        self.data = self.data.iloc[s-self.steps:s]  # <3>\n",
    "        self.data = self.data / self.data.iloc[0]  # <4>"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ab919ff2-27b9-4f95-a414-4312b5ecb025",
   "metadata": {},
   "source": [
    "# end::20[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "c43b6efb-7886-4cd8-8d22-161c9671d615",
   "metadata": {},
   "source": [
    "# tag::21[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe69c425-e014-4590-bf66-4c9ec0ceba76",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Investing(Investing):        \n",
    "    def _get_state(self):\n",
    "        Xt = self.data['Xt'].iloc[self.bar]\n",
    "        Yt = self.data['Yt'].iloc[self.bar]\n",
    "        self.date = self.data.index[self.bar]  # <1>\n",
    "        return np.array([Xt, Yt, Xt - Yt, self.xt, self.yt]), {}  # <2>\n",
    "        \n",
    "    def add_results(self, pl):\n",
    "        df = pd.DataFrame({\n",
    "               'e': self.episode, 'date': self.date,  # <3>\n",
    "               'xt': self.xt, 'yt': self.yt,\n",
    "               'pv': self.portfolio_value,\n",
    "               'pv_new': self.portfolio_value_new, 'p&l[$]': pl, \n",
    "               'p&l[%]': pl / self.portfolio_value_new * 100,\n",
    "               'Xt': self.state[0],  'Yt': self.state[1],\n",
    "               'Xt_new': self.new_state[0],\n",
    "               'Yt_new': self.new_state[1],\n",
    "                      }, index=[0])\n",
    "        self.portfolios = pd.concat((self.portfolios, df),\n",
    "                                    ignore_index=True)"
   ]
  },
  {
   "cell_type": "raw",
   "id": "4faff16c-f4ff-4e80-af94-9507a8451365",
   "metadata": {},
   "source": [
    "# end::21[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ca81e3db-3315-47bf-9b59-377269c2c288",
   "metadata": {},
   "source": [
    "# tag::22[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4bcbe3c-a50e-4649-a8f1-726ead4f4f1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Investing(Investing):\n",
    "    def step(self, action):\n",
    "        self.bar += 1\n",
    "        self.new_state, info = self._get_state()\n",
    "        if self.bar == 1:\n",
    "            self.xt = action\n",
    "            self.yt = (1 - action)\n",
    "            pl = 0.\n",
    "            reward = 0.\n",
    "            self.add_results(pl)\n",
    "        else:\n",
    "            self.portfolio_value_new = (\n",
    "                self.xt * self.portfolio_value *\n",
    "                self.new_state[0] / self.state[0] +\n",
    "                self.yt * self.portfolio_value *\n",
    "                self.new_state[1] / self.state[1])\n",
    "            pl = self.portfolio_value_new - self.portfolio_value\n",
    "            pen = (self.xt - action) ** 2  # <1>\n",
    "            self.xt = action\n",
    "            self.yt = (1 - action)\n",
    "            self.add_results(pl)\n",
    "            ret = self.portfolios['p&l[%]'].iloc[-1] / 100 * 252  # <2>\n",
    "            vol = self.portfolios['p&l[%]'].rolling(\n",
    "                20, min_periods=1).std().iloc[-1] * math.sqrt(252)  # <3>\n",
    "            sharpe = ret / vol # <4>\n",
    "            reward = sharpe - pen  # <5>\n",
    "            self.portfolio_value = self.portfolio_value_new\n",
    "        if self.bar == len(self.data) - 1:\n",
    "            done = True\n",
    "        else:\n",
    "            done = False\n",
    "        self.state = self.new_state\n",
    "        return self.state, reward, done, False, {}"
   ]
  },
  {
   "cell_type": "raw",
   "id": "169373d8-bf0a-45f0-8f72-986b3a323f7f",
   "metadata": {},
   "source": [
    "# end::22[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a7a56d26-6ed5-4ec8-b535-2df0440e0de6",
   "metadata": {},
   "source": [
    "# tag::23[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bcac62b5-c184-4a47-a4fc-fe7a1b551b6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "days = 2 * 252"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5aee535b-ef6d-487e-87c2-ddbe1cfb9322",
   "metadata": {},
   "outputs": [],
   "source": [
    "investing = Investing(steps=days)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b485de0b-e13a-4037-abc7-6dfc154ad646",
   "metadata": {},
   "outputs": [],
   "source": [
    "investing.data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6553101-ad40-47f8-8100-5ab143a96fd3",
   "metadata": {},
   "outputs": [],
   "source": [
    "investing.data.corr()  # <1>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "299564ea-e440-48d0-943e-e5d60a684c79",
   "metadata": {},
   "outputs": [],
   "source": [
    "investing.data.plot(secondary_y='Yt',\n",
    "                    style=['b', 'g--'], lw=1);\n",
    "# plt.savefig('../figures/figure_08_07.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "bccadced-4841-47cc-a73e-adcfcd7f541b",
   "metadata": {},
   "source": [
    "# end::23[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "8822ef35-364e-4525-9a6c-d368446f40e5",
   "metadata": {},
   "source": [
    "# tag::24[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fc0e6d20-82fc-4715-aa9b-2105ca649282",
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seeds()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21030e1f-e322-46b3-8ef1-383e65b710a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "investing = Investing(steps=days)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a81d98e2-c171-4960-a1df-73967a3b8d03",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent = InvestingAgent('2AC', feature=None, n_features=5,\n",
    "                     env=investing, hu=48, lr=0.0005)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1f6bbff-4f0f-4bdd-8a39-b62d4087d751",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.xp = 3  # <1>\n",
    "agent.yp = 4  # <2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "824d6c19-8142-4e49-89b4-aebd302c3115",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "episodes = 250"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2435dc88-3e67-4f9a-a9c0-e018b9394475",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%time agent.learn(episodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2dbd842b-b4f6-4ee6-bcc5-730bdcd246be",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.epsilon"
   ]
  },
  {
   "cell_type": "raw",
   "id": "4f12bada-99b5-4e3c-a9a2-216a89a92a2a",
   "metadata": {},
   "source": [
    "# end::24[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "eaa16df1-ffa3-4db9-8951-d1290a1c73a9",
   "metadata": {},
   "source": [
    "# tag::25[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90859de3-ca33-4d78-8f13-fd87f8ed19f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ba5cbc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "%time agent.test(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3785aeb2-67a5-4e73-a32b-1d2aeeddee82",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios['xt'].describe()"
   ]
  },
  {
   "cell_type": "raw",
   "id": "9efaf486-c271-47a9-b840-1c1c14e80bf7",
   "metadata": {},
   "source": [
    "# end::25[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "acf27b4e-5271-4f4a-bb38-167dd186b45f",
   "metadata": {},
   "source": [
    "# tag::26[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d23876a8-9eae-42c4-896c-8372b03cee01",
   "metadata": {},
   "outputs": [],
   "source": [
    "n = max(agent.env.portfolios['e']) - 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5997ea73-8c7b-4e2c-8ade-5e5d77b554c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "res = agent.env.portfolios[\n",
    "        agent.env.portfolios['e'] == n].set_index('date')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95b78b84-43fa-4996-be72-cb4b676f9e0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "res['xt'].plot(lw=1, c='b')\n",
    "plt.ylim(res['xt'].min() - 0.1, res['xt'].max() + 0.1)\n",
    "plt.ylabel('allocation (asset 1)');\n",
    "# plt.savefig('../figures/figure_08_08.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "10511320-44b3-4f6d-8817-b4efc6b3dec2",
   "metadata": {},
   "source": [
    "# end::26[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "74058686-1c21-4636-923b-abb9aafac50d",
   "metadata": {},
   "source": [
    "# tag::27[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c5e9de2-e82e-4a54-91b1-2b5e2b111706",
   "metadata": {},
   "outputs": [],
   "source": [
    "res[['Xt', 'Yt', 'pv']].iloc[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "788e7c6b-5d9f-4891-ab00-02d32e17379d",
   "metadata": {},
   "outputs": [],
   "source": [
    "r = np.log(res[['Xt', 'Yt', 'pv']] /\n",
    "           res[['Xt', 'Yt', 'pv']].shift(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87594fb2-c4c4-41bd-a81f-2b456eecb95e",
   "metadata": {},
   "outputs": [],
   "source": [
    "rets = np.exp(r.mean() * 252) - 1\n",
    "rets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4313a90-1c13-4a6e-9600-1933f2049e64",
   "metadata": {},
   "outputs": [],
   "source": [
    "stds = r.std() * math.sqrt(252)\n",
    "stds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec14b2d5-86cf-4095-96dd-d2b757dd400f",
   "metadata": {},
   "outputs": [],
   "source": [
    "rets / stds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55814e23-bdab-4776-be41-134d76619f47",
   "metadata": {},
   "outputs": [],
   "source": [
    "res[['Xt', 'Yt', 'pv']].plot(\n",
    "    title='PORTFOLIO VALUE',\n",
    "    style=['g--', 'b:', 'r-'],\n",
    "    lw=1, grid=True)\n",
    "plt.ylabel('value');\n",
    "# plt.savefig('../figures/figure_08_09.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "5cce3f0b-13b9-4281-87ff-baa473c906c7",
   "metadata": {},
   "source": [
    "# end::27[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "49b2755b-5a29-47fb-b418-31540cc69fcf",
   "metadata": {},
   "source": [
    "# tag::28[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5e8b746-3587-4263-974a-2b124f24ea46",
   "metadata": {},
   "outputs": [],
   "source": [
    "values = agent.env.portfolios.groupby('e')[\n",
    "        ['Xt', 'Yt', 'pv_new']].last()\n",
    "values.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "913ee35a-7b05-4b85-86cb-b0768626b001",
   "metadata": {},
   "outputs": [],
   "source": [
    "values.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88c8233e-c2ae-471f-8045-640ed0a34ee2",
   "metadata": {},
   "outputs": [],
   "source": [
    "((values['pv_new'] > values['Xt']) &\n",
    " (values['pv_new'] > values['Yt'])).value_counts()"
   ]
  },
  {
   "cell_type": "raw",
   "id": "735c5ca3-49c1-4b80-a48c-d67e7bff0951",
   "metadata": {},
   "source": [
    "# end::28[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0ff32cb-0858-4f7b-acf5-c72e4f53dd53",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.env.portfolios.groupby(['e'])['pv_new'].last()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c3207ecc-7632-4b56-b369-f5fe60ef4e27",
   "metadata": {},
   "source": [
    "<img src=\"http://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>\n",
    "\n",
    "<a href=\"http://tpq.io\" target=\"_blank\">http://tpq.io</a> | <a href=\"http://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
diff --git a/dqlagent.py b/dqlagent.py
 #
 # Deep Q-Learning Agent
 #
 # (c) Dr. Yves J. Hilpisch
 # Reinforcement Learning for Finance
 #

 import os
 import random
 import warnings
 import numpy as np
 import tensorflow as tf
 from tensorflow import keras
 from collections import deque
 from keras.layers import Dense, Flatten
 from keras.models import Sequential

 warnings.simplefilter('ignore')
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


 from tensorflow.python.framework.ops import disable_eager_execution
 disable_eager_execution()

 opt = keras.optimizers.legacy.Adam


 class DQLAgent:
    def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001):
        self.epsilon = 1.0
        self.epsilon_decay = 0.9975
        self.epsilon_min = 0.1
        self.memory = deque(maxlen=2000)
        self.batch_size = 32
        self.gamma = 0.5
        self.trewards = list()
        self.max_treward = -np.inf
        self.n_features = n_features
        self.env = env
        self.episodes = 0
        self._create_model(hu, lr)
        
    def _create_model(self, hu, lr):
        self.model = Sequential()
        self.model.add(Dense(hu, activation='relu',
                             input_dim=self.n_features))
        self.model.add(Dense(hu, activation='relu'))
        self.model.add(Dense(2, activation='linear'))
        self.model.compile(loss='mse', optimizer=opt(learning_rate=lr))
        
    def _reshape(self, state):
        state = state.flatten()
        return np.reshape(state, [1, len(state)])
            
    def act(self, state):
        if random.random() < self.epsilon:
            return self.env.action_space.sample()
        return np.argmax(self.model.predict(state)[0])
        
    def replay(self):
        batch = random.sample(self.memory, self.batch_size)
        for state, action, next_state, reward, done in batch:
            if not done:
                reward += self.gamma * np.amax(
                    self.model.predict(next_state)[0])
                target = self.model.predict(state)
                target[0, action] = reward
                self.model.fit(state, target, epochs=1, verbose=False)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def learn(self, episodes):
        for e in range(1, episodes + 1):
            self.episodes += 1
            state, _ = self.env.reset()
            state = self._reshape(state)
            treward = 0
            for f in range(1, 5000):
                self.f = f
                action = self.act(state)
                next_state, reward, done, trunc, _ = self.env.step(action)
                treward += reward
                next_state = self._reshape(next_state)
                self.memory.append(
                    [state, action, next_state, reward, done])
                state = next_state 
                if done:
                    self.trewards.append(treward)
                    self.max_treward = max(self.max_treward, treward)
                    templ = f'episode={self.episodes:4d} | '
                    templ += f'treward={treward:7.3f}'
                    templ += f' | max={self.max_treward:7.3f}'
                    print(templ, end='\r')
                    break
            if len(self.memory) > self.batch_size:
                self.replay()
        print()
        
    def test(self, episodes, min_accuracy=0.0,
             min_performance=0.0, verbose=True,
             full=True):
        ma = self.env.min_accuracy
        self.env.min_accuracy = min_accuracy
        if hasattr(self.env, 'min_performance'):
            mp = self.env.min_performance
            self.env.min_performance = min_performance
            self.performances = list()
        for e in range(1, episodes + 1):
            state, _ = self.env.reset()
            state = self._reshape(state)
            for f in range(1, 5001):
                action = np.argmax(self.model.predict(state)[0])
                state, reward, done, trunc, _ = self.env.step(action)
                state = self._reshape(state)
                if done:
                    templ = f'total reward={f:4d} | '
                    templ += f'accuracy={self.env.accuracy:.3f}'
                    if hasattr(self.env, 'min_performance'):
                        self.performances.append(self.env.performance)
                        templ += f' | performance={self.env.performance:.3f}'
                    if verbose:
                        if full:
                            print(templ)
                        else:
                            print(templ, end='\r')
                    break
        self.env.min_accuracy = ma
        if hasattr(self.env, 'min_performance'):
            self.env.min_performance = mp
        print()
	#
	# Deep Q-Learning Agent
	#
	# (c) Dr. Yves J. Hilpisch
	# Reinforcement Learning for Finance
	#

	import os
	import random
	import warnings
	import numpy as np
	import tensorflow as tf
	from tensorflow import keras
	from collections import deque
	from keras.layers import Dense, Flatten
	from keras.models import Sequential

	warnings.simplefilter('ignore')
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


	from tensorflow.python.framework.ops import disable_eager_execution
	disable_eager_execution()

	opt = keras.optimizers.legacy.Adam


	class DQLAgent:
	def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001):
	self.epsilon = 1.0
	self.epsilon_decay = 0.9975
	self.epsilon_min = 0.1
	self.memory = deque(maxlen=2000)
	self.batch_size = 32
	self.gamma = 0.5
	self.trewards = list()
	self.max_treward = -np.inf
	self.n_features = n_features
	self.env = env
	self.episodes = 0
	self._create_model(hu, lr)

	def _create_model(self, hu, lr):
	self.model = Sequential()
	self.model.add(Dense(hu, activation='relu',
	input_dim=self.n_features))
	self.model.add(Dense(hu, activation='relu'))
	self.model.add(Dense(2, activation='linear'))
	self.model.compile(loss='mse', optimizer=opt(learning_rate=lr))

	def _reshape(self, state):
	state = state.flatten()
	return np.reshape(state, [1, len(state)])

	def act(self, state):
	if random.random() < self.epsilon:
	return self.env.action_space.sample()
	return np.argmax(self.model.predict(state)[0])

	def replay(self):
	batch = random.sample(self.memory, self.batch_size)
	for state, action, next_state, reward, done in batch:
	if not done:
	reward += self.gamma * np.amax(
	self.model.predict(next_state)[0])
	target = self.model.predict(state)
	target[0, action] = reward
	self.model.fit(state, target, epochs=1, verbose=False)
	if self.epsilon > self.epsilon_min:
	self.epsilon *= self.epsilon_decay

	def learn(self, episodes):
	for e in range(1, episodes + 1):
	self.episodes += 1
	state, _ = self.env.reset()
	state = self._reshape(state)
	treward = 0
	for f in range(1, 5000):
	self.f = f
	action = self.act(state)
	next_state, reward, done, trunc, _ = self.env.step(action)
	treward += reward
	next_state = self._reshape(next_state)
	self.memory.append(
	[state, action, next_state, reward, done])
	state = next_state
	if done:
	self.trewards.append(treward)
	self.max_treward = max(self.max_treward, treward)
	templ = f'episode={self.episodes:4d} \| '
	templ += f'treward={treward:7.3f}'
	templ += f' \| max={self.max_treward:7.3f}'
	print(templ, end='\r')
	break
	if len(self.memory) > self.batch_size:
	self.replay()
	print()

	def test(self, episodes, min_accuracy=0.0,
	min_performance=0.0, verbose=True,
	full=True):
	ma = self.env.min_accuracy
	self.env.min_accuracy = min_accuracy
	if hasattr(self.env, 'min_performance'):
	mp = self.env.min_performance
	self.env.min_performance = min_performance
	self.performances = list()
	for e in range(1, episodes + 1):
	state, _ = self.env.reset()
	state = self._reshape(state)
	for f in range(1, 5001):
	action = np.argmax(self.model.predict(state)[0])
	state, reward, done, trunc, _ = self.env.step(action)
	state = self._reshape(state)
	if done:
	templ = f'total reward={f:4d} \| '
	templ += f'accuracy={self.env.accuracy:.3f}'
	if hasattr(self.env, 'min_performance'):
	self.performances.append(self.env.performance)
	templ += f' \| performance={self.env.performance:.3f}'
	if verbose:
	if full:
	print(templ)
	else:
	print(templ, end='\r')
	break
	self.env.min_accuracy = ma
	if hasattr(self.env, 'min_performance'):
	self.env.min_performance = mp
	print()