yhilpisch · August 11, 2025 05:10
diff --git a/00_rl4f_odsc.md b/00_rl4f_odsc.md
diff --git a/01_rl4f_odsc.ipynb b/01_rl4f_odsc.ipynb
diff --git a/02_rl4f_odsc.ipynb b/02_rl4f_odsc.ipynb
diff --git a/03_rl4f_odsc.ipynb b/03_rl4f_odsc.ipynb
diff --git a/04_rl4f_odsc.ipynb b/04_rl4f_odsc.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "475819a4-e148-4616-b1cb-44b659aeb08a",
   "metadata": {},
   "source": [
    "<img src=\"https://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e",
   "metadata": {},
   "source": [
    "# Reinforcement Learning for Finance\n",
    "\n",
    "**Chapter 07 &mdash; Dynamic Hedging**\n",
    "\n",
    "&copy; Dr. Yves J. Hilpisch\n",
    "\n",
    "<a href=\"https://tpq.io\" target=\"_blank\">https://tpq.io</a> | <a href=\"https://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e",
   "metadata": {},
   "source": [
    "## Delta Hedging"
   ]
  },
  {
   "cell_type": "raw",
   "id": "bcc20fa7-c4ce-44b7-b3ce-080856f592f9",
   "metadata": {},
   "source": [
    "# tag::01[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b74284e7-9506-4793-bc99-016775313b22",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import math\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import stats\n",
    "from pylab import plt, mpl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e80ce705-6c55-46d9-9199-549d8ea689f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.style.use('seaborn-v0_8')\n",
    "mpl.rcParams['figure.dpi'] = 300\n",
    "mpl.rcParams['savefig.dpi'] = 300\n",
    "mpl.rcParams['font.family'] = 'serif'\n",
    "np.set_printoptions(suppress=True)"
   ]
  },
  {
   "cell_type": "raw",
   "id": "fd098209-20e3-4a88-8b1c-94e7c784229b",
   "metadata": {},
   "source": [
    "# end::01[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "86610d76-e354-4db3-89c7-522a41872bce",
   "metadata": {},
   "source": [
    "# tag::02[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cf2bc18-3029-4a69-baf7-21d1efcd54a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from bsm73 import bsm_call_value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92fd8b53-2281-4332-9306-c8416e88d8b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "S0 = 100  # <1>\n",
    "K = 100  # <2>\n",
    "T = 1.  # <3>\n",
    "t = 0.  # <4>\n",
    "r = 0.05  # <5>\n",
    "sigma = 0.2  # <6>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57fd3991-aeb8-408d-aba8-e704fb68e97b",
   "metadata": {},
   "outputs": [],
   "source": [
    "bsm_call_value(S0, K, T, t, r, sigma)"
   ]
  },
  {
   "cell_type": "raw",
   "id": "b2c6bacd-4fb4-4b10-8a3f-e752d6c285d5",
   "metadata": {},
   "source": [
    "# end::02[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "78487203-1f09-4c58-bd91-46592c9b32d9",
   "metadata": {},
   "source": [
    "# tag::03[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8aa7775b-3842-414f-b6c3-3e1f7dd4b176",
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c255e96e-11b2-4e66-a990-372a59b1f418",
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate_gbm(S0, T, r, sigma, steps=100):\n",
    "    gbm = [S0]\n",
    "    dt = T / steps\n",
    "    for t in range(1, steps + 1):\n",
    "        st = gbm[-1] * math.exp((r - sigma ** 2 / 2) * dt\n",
    "                    + sigma * math.sqrt(dt) * random.gauss(0, 1))\n",
    "        gbm.append(st)\n",
    "    return gbm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00c6d682-1f6e-4e53-93ff-a8db7b3d6626",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "gbm = simulate_gbm(S0, T, r, sigma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08e95452-a460-4cb4-945b-f40a4293d055",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "plt.plot(gbm, lw=1.0, c='b')\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('stock price');\n",
    "# plt.savefig('../figures/figure_07_01.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "795c69a4-bb68-4567-99e7-96fd98d91ab9",
   "metadata": {},
   "source": [
    "# end::03[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "4143ebf1-744c-417c-88f8-87cfb02f9741",
   "metadata": {},
   "source": [
    "# tag::04[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a749b90-e603-488a-830a-fa71e4df6891",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def bsm_delta(St, K, T, t, r, sigma):\n",
    "    d1 = ((math.log(St / K) + (r + 0.5 * sigma ** 2) * (T - t)) /\n",
    "          (sigma * math.sqrt(T - t)))\n",
    "    return stats.norm.cdf(d1, 0, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3075a0b-5a3b-4a6e-8602-5c790fd8875c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "S_ = range(40, 181, 4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80f18d97-4f6b-46a7-9b6d-a237b4b8dc11",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "d = [bsm_delta(s, K, T, 0, r, sigma) for s in S_]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21e59d07-88de-4a5d-882a-a3626db0135d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "plt.plot(S_, d, lw=1.0, c='b')\n",
    "plt.xlabel('stock price')\n",
    "plt.ylabel('delta');\n",
    "# plt.savefig('../figures/figure_07_02.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "db5aef56-32e7-498a-9e5a-8f8e13f8f26f",
   "metadata": {},
   "source": [
    "# end::04[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "f38c9bed-c508-4203-a2f5-f3e06b08bd93",
   "metadata": {},
   "source": [
    "# tag::05[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee3aeefe-0fbb-40bc-b71a-a3f8b30a092a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "dt = T / (len(gbm) - 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc4bcce9-9d70-4105-b116-4d00622d5947",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "bond = [math.exp(r * i * dt) for i in range(len(gbm))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df109760-a22c-4ef0-8f52-b641c4b60d62",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def option_replication():\n",
    "    res = pd.DataFrame()\n",
    "    for i in range(len(gbm) - 1):\n",
    "        C = bsm_call_value(gbm[i], K, T, i * dt, r, sigma)\n",
    "        if i == 0:\n",
    "            s = bsm_delta(gbm[i], K, T, i * dt, r, sigma)  # <1>\n",
    "            b = (C - s * gbm[i]) / bond[i]  # <2>\n",
    "        else:\n",
    "            V = s * gbm[i] + b * bond[i]  # <3>\n",
    "            s = bsm_delta(gbm[i], K, T, i * dt, r, sigma)  # <4>\n",
    "            b = (C - s * gbm[i]) / bond[i]  # <5>\n",
    "            df = pd.DataFrame({'St': gbm[i], 'C': C, 'V': V,\n",
    "                               's': s, 'b': b}, index=[0])  # <6>\n",
    "            res = pd.concat((res, df), ignore_index=True)  # <6>\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b75706b-0254-4334-9a76-033386d96108",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "res = option_replication()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f91730af-4da5-4111-a511-26d5a2f14d0e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "res[['C', 'V']].plot(style=['b', 'r--'], lw=1)\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('value');\n",
    "# plt.savefig('../figures/figure_07_03.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "31cc1b28-8948-4ea8-8981-97e1721ddbc1",
   "metadata": {},
   "source": [
    "# end::05[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "e34475ee-b198-4d06-a171-705bc69701c4",
   "metadata": {},
   "source": [
    "# tag::06[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1e661ef-0660-4191-999b-7442072fdf8c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "(res['V'] - res['C']).mean()  # <1>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "697c07af-406f-497c-8348-508c7a1d6389",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "((res['V'] - res['C']) ** 2).mean()  # <2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d430c959-eec8-489b-ac68-f2d2b096bcd2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "(res['V'] - res['C']).hist(bins=35, color='b')\n",
    "plt.xlabel('P&L')\n",
    "plt.ylabel('frequency');\n",
    "# plt.savefig('../figures/figure_07_04.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "8ff5f226-1b99-4afd-9ec1-d597d9fe69d4",
   "metadata": {},
   "source": [
    "# end::06[]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36861754-df58-40cf-a690-512291df5731",
   "metadata": {},
   "source": [
    "## Hedging Environment"
   ]
  },
  {
   "cell_type": "raw",
   "id": "1cd6d772-fd28-4c8d-b616-55251a8db30d",
   "metadata": {},
   "source": [
    "# tag::07[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0585829d-c4a2-494f-a5b9-ba3a0c6d5b1c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class observation_space:\n",
    "    def __init__(self, n):\n",
    "        self.shape = (n,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "754cfe0d-8fff-4d2d-b96a-d6c611de6226",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class action_space:\n",
    "    def __init__(self, n):\n",
    "        self.n = n\n",
    "    def seed(self, seed):\n",
    "        random.seed(seed)\n",
    "    def sample(self):\n",
    "        return random.random()  # <1>"
   ]
  },
  {
   "cell_type": "raw",
   "id": "9ab9f6f6-bfa6-4736-8eea-819f4ffce652",
   "metadata": {},
   "source": [
    "# end::07[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "d834537a-d04f-4772-a53d-f14e921f9d39",
   "metadata": {},
   "source": [
    "# tag::08[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "622556ea-c7fc-4418-862e-c0403506f175",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Hedging:\n",
    "    def __init__(self, S0, K_, T, r_, sigma_, steps):\n",
    "        self.initial_value = S0\n",
    "        self.strike_ = K_  # <1>\n",
    "        self.maturity = T\n",
    "        self.short_rate_ = r_  # <1>\n",
    "        self.volatility_ = sigma_  # <1>\n",
    "        self.steps = steps\n",
    "        self.observation_space = observation_space(5)\n",
    "        self.osn = self.observation_space.shape[0]\n",
    "        self.action_space = action_space(1)\n",
    "        self._simulate_data()\n",
    "        self.portfolios = pd.DataFrame()\n",
    "        self.episode = 0"
   ]
  },
  {
   "cell_type": "raw",
   "id": "80cc319c-6eb6-4115-b352-8c392570286f",
   "metadata": {},
   "source": [
    "# end::08[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "f9a4c607-f5d0-4fd4-95f5-fa17ccd2fe53",
   "metadata": {},
   "source": [
    "# tag::09[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bae8dc41-21a9-46e6-abc4-5b18a83a2839",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Hedging(Hedging):\n",
    "    def _simulate_data(self):\n",
    "        s = [self.initial_value]\n",
    "        self.strike = random.choice(self.strike_)  # <1>\n",
    "        self.short_rate = random.choice(self.short_rate_)  # <1>\n",
    "        self.volatility = random.choice(self.volatility_)  # <1>\n",
    "        self.dt = self.maturity / self.steps\n",
    "        for t in range(1, self.steps + 1):\n",
    "            st = s[t - 1] * math.exp(\n",
    "              ((self.short_rate - self.volatility ** 2 / 2) * self.dt +\n",
    "                self.volatility * math.sqrt(self.dt) *\n",
    "                  random.gauss(0, 1)))  # <2>\n",
    "            s.append(st)\n",
    "        self.data = pd.DataFrame(s, columns=['index'])\n",
    "        self.data['bond'] = np.exp(self.short_rate *\n",
    "                            np.arange(len(self.data)) * self.dt)"
   ]
  },
  {
   "cell_type": "raw",
   "id": "fecb868b-073a-4d44-870b-b7223fb159d6",
   "metadata": {},
   "source": [
    "# end::09[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "39e024df-2ef8-4880-af81-b6b3faf2dbf0",
   "metadata": {},
   "source": [
    "# tag::10[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "303c0a10-95af-42e1-8f97-0c9ecc91d4d7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Hedging(Hedging):\n",
    "    def _get_state(self):\n",
    "        St = self.data['index'].iloc[self.bar]\n",
    "        Bt = self.data['bond'].iloc[self.bar]\n",
    "        ttm = self.maturity - self.bar * self.dt\n",
    "        if ttm > 0:\n",
    "            Ct = bsm_call_value(St, self.strike,\n",
    "                           self.maturity, self.bar * self.dt,\n",
    "                           self.short_rate, self.volatility)\n",
    "        else:\n",
    "            Ct = max(St - self.strike, 0)\n",
    "        return np.array([St, Bt, ttm, Ct, self.strike, self.short_rate,\n",
    "                         self.stock, self.bond]), {} \n",
    "    def seed(self, seed=None):\n",
    "        if seed is not None:\n",
    "            random.seed(seed)\n",
    "    def reset(self):\n",
    "        self.bar = 0\n",
    "        self.bond = 0\n",
    "        self.stock = 0\n",
    "        self.treward = 0\n",
    "        self.episode += 1\n",
    "        self._simulate_data()\n",
    "        self.state, _ = self._get_state()\n",
    "        return self.state, _"
   ]
  },
  {
   "cell_type": "raw",
   "id": "761d2793-e808-487e-9e9c-1c0c551a7a90",
   "metadata": {},
   "source": [
    "# end::10[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "3856a1db-3153-4479-9474-41d7cc53e0ec",
   "metadata": {},
   "source": [
    "# tag::11[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ceb2abe-a786-4fcf-a570-24e7099cfae8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Hedging(Hedging):\n",
    "    def step(self, action):\n",
    "        if self.bar == 0:  # <1>\n",
    "            reward = 0\n",
    "            self.bar += 1\n",
    "            self.stock = float(action)  # <2>\n",
    "            self.bond = ((self.state[3] - self.stock * self.state[0]) /\n",
    "                         self.state[1])  # <3>\n",
    "            self.new_state, _ = self._get_state()\n",
    "        else:\n",
    "            self.bar += 1\n",
    "            self.new_state, _ = self._get_state()\n",
    "            phi_value = (self.stock * self.new_state[0] +\n",
    "                   self.bond * self.new_state[1])  # <4>\n",
    "            pl = phi_value - self.new_state[3]  # <5>\n",
    "            df = pd.DataFrame({'e': self.episode, 's': self.stock,\n",
    "                               'b': self.bond, 'phi': phi_value,\n",
    "                               'C': self.new_state[3], 'p&l[$]': pl,\n",
    "                               'p&l[%]': pl / max(self.new_state[3],\n",
    "                                                  1e-4) * 100,\n",
    "                               'St': self.new_state[0],\n",
    "                               'Bt': self.new_state[1],\n",
    "                               'K': self.strike, 'r': self.short_rate,\n",
    "                               'sigma': self.volatility},\n",
    "                              index=[0])  # <6>\n",
    "            self.portfolios = pd.concat((self.portfolios, df),\n",
    "                                        ignore_index=True)  # <6>\n",
    "            reward = -(phi_value - self.new_state[3]) ** 2  # <7>\n",
    "            self.stock = float(action)  # <2>\n",
    "            self.bond = ((self.new_state[3] -\n",
    "                          self.stock * self.new_state[0]) /\n",
    "                          self.new_state[1])  # <3>\n",
    "        if self.bar == len(self.data) - 1:  # <8>\n",
    "            done = True\n",
    "        else:\n",
    "            done = False\n",
    "        self.state = self.new_state\n",
    "        return self.state, float(reward), done, False, {}"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ec195e72-d855-415c-9505-6b0ec1cff8a5",
   "metadata": {},
   "source": [
    "# end::11[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "b2ff2a12-17f4-44ed-a641-1e93ea5af96c",
   "metadata": {},
   "source": [
    "# tag::12[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35fc17a3-cdac-4fff-960b-b2a5e900bb2e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "S0 = 100."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2729df4e-7f42-48f3-b3af-7fb2b6b53545",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging = Hedging(S0=S0,\n",
    "              K_=np.array([0.9, 0.95, 1., 1.05, 1.10]) * S0,\n",
    "              T=1.0, r_=[0, 0.01, 0.05],\n",
    "              sigma_=[0.1, 0.15, 0.2], steps=2 * 252) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "130fa362-6876-4749-88fa-077160c3de51",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging.seed(750)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80e7a078-5f57-44e3-876c-1709d64e4ce4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging._simulate_data()\n",
    "(hedging.data / hedging.data.iloc[0]).plot(\n",
    "    lw=1.0, style=['r--', 'b-.'])\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('price');\n",
    "# plt.savefig('../figures/figure_07_05.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "5818dd3c-2aeb-4096-bd47-06644ff1374a",
   "metadata": {},
   "source": [
    "# end::12[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "cfe6d2a2-2c9e-40c9-b28a-985c9c203be7",
   "metadata": {},
   "source": [
    "# tag::13[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3325e79-9775-45e5-9796-cea322034eea",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging.reset()\n",
    "for _ in range(hedging.steps - 1):\n",
    "    hedging.step(hedging.action_space.sample())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2cf19e9-f371-44f6-9a1e-ccbc126f44f7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging.portfolios.head().round(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3af8aa96-6d43-4234-83ca-0e456a1cf356",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging.portfolios[['C', 'phi']].plot(\n",
    "    style=['r--', 'b-'], lw=1, alpha=0.7)\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('value');\n",
    "# plt.savefig('../figures/figure_07_06.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "44166346-6bb7-4cef-800d-f6a66c30d651",
   "metadata": {},
   "source": [
    "# end::13[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "455a9240-96fe-4c12-8ea3-0612006529d7",
   "metadata": {},
   "source": [
    "# tag::14[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ad77e9f-54ef-4518-b97b-ef573e777a37",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging.portfolios['p&l[$]'].apply(abs).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6a61e91-45c7-435b-a198-82c3b68e5db9",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedging.portfolios['p&l[$]'].hist(bins=35, color='b')\n",
    "plt.xlabel('P&L')\n",
    "plt.ylabel('frequency');\n",
    "# plt.savefig('../figures/figure_07_07.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "47519f1c-8369-465c-9943-eec35ede2843",
   "metadata": {},
   "source": [
    "# end::14[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a52562b8-176f-4207-bf6a-cf381d77fa0f",
   "metadata": {},
   "source": [
    "# tag::15[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81052058-331d-41af-99f6-803514a933fc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from dqlagent import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83e1990b-3c0c-41fa-8208-b2a77f90ab51",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "random.seed(100)\n",
    "tf.random.set_seed(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "753f5091-d0bb-4f47-b6ce-e16bca9ebdb2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "opt = keras.optimizers.legacy.Adam"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bd8fb5d-39f5-4e83-9118-c78846e545a0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class HedgingAgent(DQLAgent):\n",
    "    def _create_model(self, hu, lr):\n",
    "        self.model = Sequential()\n",
    "        self.model.add(Dense(hu, input_dim=self.n_features,\n",
    "                        activation='relu'))\n",
    "        self.model.add(Dense(hu, activation='relu'))\n",
    "        self.model.add(Dense(1, activation='linear'))  # <1>\n",
    "        self.model.compile(loss='mse',\n",
    "                optimizer=opt(learning_rate=lr))"
   ]
  },
  {
   "cell_type": "raw",
   "id": "8a2edc83-3bfa-4154-815f-7d5bb81c60a9",
   "metadata": {},
   "source": [
    "# end::15[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "e72ea993-84b6-4757-a281-820c260e78c5",
   "metadata": {},
   "source": [
    "# tag::16[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47f55cb0-9513-4a20-aad6-7cba3fc0be7a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from scipy.optimize import minimize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6085e91b-cc2e-4c99-8d05-205892fb0272",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class HedgingAgent(HedgingAgent):\n",
    "    def opt_action(self, state):\n",
    "        bnds = [(0, 1)]  # <1>\n",
    "        def f(state, x):  # <2>\n",
    "            s = state.copy()\n",
    "            s[0, 6] = x  # <3>\n",
    "            s[0, 7] = ((s[0, 3] - x * s[0, 0]) / s[0, 1])  # <4>\n",
    "            return self.model.predict(s)[0, 0]  # <5>\n",
    "        try:\n",
    "            action = minimize(lambda x: -f(state, x), 0.5,\n",
    "                              bounds=bnds, method='Powell',\n",
    "                             )['x'][0]  # <6>\n",
    "        except:\n",
    "            action = self.env.stock\n",
    "        return action\n",
    "        \n",
    "    def act(self, state):\n",
    "        if random.random() <= self.epsilon:\n",
    "            return self.env.action_space.sample()\n",
    "        action = self.opt_action(state)  # <7>\n",
    "        return action"
   ]
  },
  {
   "cell_type": "raw",
   "id": "146d1691-62c0-42c3-a5f1-3281e4472c2b",
   "metadata": {},
   "source": [
    "# end::16[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "5e9d40af-9c1c-4213-88b6-3988a75d25c3",
   "metadata": {},
   "source": [
    "# tag::17[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f14d3677-9499-4c71-973e-3b9008028dcc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class HedgingAgent(HedgingAgent):\n",
    "    def replay(self):\n",
    "        batch = random.sample(self.memory, self.batch_size)\n",
    "        for state, action, next_state, reward, done in batch:\n",
    "            target = reward\n",
    "            if not done:\n",
    "                ns = next_state.copy()\n",
    "                action = self.opt_action(ns)  # <1>\n",
    "                ns[0, 6] = action  # <2>\n",
    "                ns[0, 7] = ((ns[0, 3] -\n",
    "                    action * ns[0, 0]) / ns[0, 1])  # <3>\n",
    "                target += (self.gamma *\n",
    "                    self.model.predict(ns)[0, 0])  # <4>\n",
    "            self.model.fit(state, np.array([target]), epochs=1,\n",
    "                           verbose=False)\n",
    "        if self.epsilon > self.epsilon_min:\n",
    "            self.epsilon *= self.epsilon_decay"
   ]
  },
  {
   "cell_type": "raw",
   "id": "bcb34fb7-bcdc-465b-a26c-082f4a1364ff",
   "metadata": {},
   "source": [
    "# end::17[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "6593422d-214c-4ccf-9145-e92013e44a96",
   "metadata": {},
   "source": [
    "# tag::18[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5c90b10-fa10-41c4-ad62-555ea76b7c63",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class HedgingAgent(HedgingAgent):\n",
    "    def test(self, episodes, verbose=True):\n",
    "        for e in range(1, episodes + 1):\n",
    "            state, _ = self.env.reset()\n",
    "            state = self._reshape(state)\n",
    "            treward = 0\n",
    "            for _ in range(1, len(self.env.data) + 1):\n",
    "                action = self.opt_action(state)\n",
    "                state, reward, done, trunc, _ = self.env.step(action)\n",
    "                state = self._reshape(state)\n",
    "                treward += reward\n",
    "                if done:\n",
    "                    templ = f'total penalty={treward:4.2f}'\n",
    "                    if verbose:\n",
    "                        print(templ)\n",
    "                    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4bea541-1657-4264-9690-d98b02be7c2e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "random.seed(100)\n",
    "np.random.seed(100)\n",
    "tf.random.set_seed(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f442546f-b5da-4081-ab10-69c62132ae4d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedgingagent = HedgingAgent('SYM', feature=None, n_features=8,\n",
    "                     env=hedging, hu=128, lr=0.0001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "196cd97b-abe1-4944-b63f-7da29f97c11b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "episodes = 250"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "118377b4-cb91-4b33-b7e7-2851b549d48d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%time hedgingagent.learn(episodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5da9d2b-70e0-4178-afc6-13c677766fef",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedgingagent.epsilon"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ac829a56-0416-4b96-b57a-f5309a1e972f",
   "metadata": {},
   "source": [
    "# end::18[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a1583359-3523-4692-bb48-4aff076d399e",
   "metadata": {},
   "source": [
    "# tag::19[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8d4652b-9e71-4a0a-b38b-26181f53a4c8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%time hedgingagent.test(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "151903f4-d5e7-4f9b-afc9-c737d2809b71",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "n = max(hedgingagent.env.portfolios['e'])  # <1>\n",
    "n -= 1  # <1>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61319424-d973-47bb-9009-e47824335315",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedgingagent.env.portfolios[\n",
    "    hedgingagent.env.portfolios['e'] == n]['p&l[$]'].describe()  # <2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c3ceebb-3277-422d-9b86-96eaac230ba6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "p = hedgingagent.env.portfolios[\n",
    "    hedgingagent.env.portfolios['e'] == n].iloc[0][\n",
    "    ['K', 'r', 'sigma']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "22dd24cd-4334-4aa1-9c40-f4fdcfa452bf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "title = f\"CALL | K={p['K']:.1f} | r={p['r']} | sigma={p['sigma']}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90ff0fe0-7707-4b0c-a50b-820909f98fca",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedgingagent.env.portfolios[\n",
    "    hedgingagent.env.portfolios['e'] == n][\n",
    "    ['phi', 'C', 'St']].iloc[:100].plot(\n",
    "    secondary_y='St', title=title, style=['r-', 'b--', 'g:'], lw=1)\n",
    "plt.xlabel('time step')\n",
    "plt.ylabel('value');\n",
    "# plt.savefig('../figures/figure_07_08.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "c20a7950-967f-4a71-81d1-3bc4a3c80ddf",
   "metadata": {},
   "source": [
    "# end::19[]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "9030a2f1-acb4-4562-8cec-1b57a540a10c",
   "metadata": {},
   "source": [
    "# tag::20[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc30fee1-fee9-4b0c-b67d-c24efc05f817",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "hedgingagent.env.portfolios[\n",
    "    hedgingagent.env.portfolios['e'] == n]['p&l[$]'].hist(\n",
    "        bins=35, color='blue')\n",
    "plt.title(title)\n",
    "plt.xlabel('P&L')\n",
    "plt.ylabel('frequency');\n",
    "# plt.savefig('../figures/figure_07_09.png');"
   ]
  },
  {
   "cell_type": "raw",
   "id": "d4c3bfeb-a6b3-44e5-ae50-8e3b8b13b81a",
   "metadata": {},
   "source": [
    "# end::20[]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55",
   "metadata": {},
   "source": [
    "<img src=\"https://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>\n",
    "\n",
    "<a href=\"https://tpq.io\" target=\"_blank\">https://tpq.io</a> | <a href=\"https://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
diff --git a/05_rl4f_odsc.ipynb b/05_rl4f_odsc.ipynb
diff --git a/dqlagent.py b/dqlagent.py
 #
 # Deep Q-Learning Agent
 #
 # (c) Dr. Yves J. Hilpisch
 # Reinforcement Learning for Finance
 #

 import os
 import random
 import warnings
 import numpy as np
 import tensorflow as tf
 from tensorflow import keras
 from collections import deque
 from keras.layers import Dense, Flatten
 from keras.models import Sequential

 warnings.simplefilter('ignore')
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


 from tensorflow.python.framework.ops import disable_eager_execution
 disable_eager_execution()

 opt = keras.optimizers.legacy.Adam


 class DQLAgent:
    def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001):
        self.epsilon = 1.0
        self.epsilon_decay = 0.9975
        self.epsilon_min = 0.1
        self.memory = deque(maxlen=2000)
        self.batch_size = 32
        self.gamma = 0.5
        self.trewards = list()
        self.max_treward = -np.inf
        self.n_features = n_features
        self.env = env
        self.episodes = 0
        self._create_model(hu, lr)
        
    def _create_model(self, hu, lr):
        self.model = Sequential()
        self.model.add(Dense(hu, activation='relu',
                             input_dim=self.n_features))
        self.model.add(Dense(hu, activation='relu'))
        self.model.add(Dense(2, activation='linear'))
        self.model.compile(loss='mse', optimizer=opt(learning_rate=lr))
        
    def _reshape(self, state):
        state = state.flatten()
        return np.reshape(state, [1, len(state)])
            
    def act(self, state):
        if random.random() < self.epsilon:
            return self.env.action_space.sample()
        return np.argmax(self.model.predict(state)[0])
        
    def replay(self):
        batch = random.sample(self.memory, self.batch_size)
        for state, action, next_state, reward, done in batch:
            if not done:
                reward += self.gamma * np.amax(
                    self.model.predict(next_state)[0])
                target = self.model.predict(state)
                target[0, action] = reward
                self.model.fit(state, target, epochs=1, verbose=False)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def learn(self, episodes):
        for e in range(1, episodes + 1):
            self.episodes += 1
            state, _ = self.env.reset()
            state = self._reshape(state)
            treward = 0
            for f in range(1, 5000):
                self.f = f
                action = self.act(state)
                next_state, reward, done, trunc, _ = self.env.step(action)
                treward += reward
                next_state = self._reshape(next_state)
                self.memory.append(
                    [state, action, next_state, reward, done])
                state = next_state 
                if done:
                    self.trewards.append(treward)
                    self.max_treward = max(self.max_treward, treward)
                    templ = f'episode={self.episodes:4d} | '
                    templ += f'treward={treward:7.3f}'
                    templ += f' | max={self.max_treward:7.3f}'
                    print(templ, end='\r')
                    break
            if len(self.memory) > self.batch_size:
                self.replay()
        print()
        
    def test(self, episodes, min_accuracy=0.0,
             min_performance=0.0, verbose=True,
             full=True):
        ma = self.env.min_accuracy
        self.env.min_accuracy = min_accuracy
        if hasattr(self.env, 'min_performance'):
            mp = self.env.min_performance
            self.env.min_performance = min_performance
            self.performances = list()
        for e in range(1, episodes + 1):
            state, _ = self.env.reset()
            state = self._reshape(state)
            for f in range(1, 5001):
                action = np.argmax(self.model.predict(state)[0])
                state, reward, done, trunc, _ = self.env.step(action)
                state = self._reshape(state)
                if done:
                    templ = f'total reward={f:4d} | '
                    templ += f'accuracy={self.env.accuracy:.3f}'
                    if hasattr(self.env, 'min_performance'):
                        self.performances.append(self.env.performance)
                        templ += f' | performance={self.env.performance:.3f}'
                    if verbose:
                        if full:
                            print(templ)
                        else:
                            print(templ, end='\r')
                    break
        self.env.min_accuracy = ma
        if hasattr(self.env, 'min_performance'):
            self.env.min_performance = mp
        print()
	#
	# Deep Q-Learning Agent
	#
	# (c) Dr. Yves J. Hilpisch
	# Reinforcement Learning for Finance
	#

	import os
	import random
	import warnings
	import numpy as np
	import tensorflow as tf
	from tensorflow import keras
	from collections import deque
	from keras.layers import Dense, Flatten
	from keras.models import Sequential

	warnings.simplefilter('ignore')
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


	from tensorflow.python.framework.ops import disable_eager_execution
	disable_eager_execution()

	opt = keras.optimizers.legacy.Adam


	class DQLAgent:
	def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001):
	self.epsilon = 1.0
	self.epsilon_decay = 0.9975
	self.epsilon_min = 0.1
	self.memory = deque(maxlen=2000)
	self.batch_size = 32
	self.gamma = 0.5
	self.trewards = list()
	self.max_treward = -np.inf
	self.n_features = n_features
	self.env = env
	self.episodes = 0
	self._create_model(hu, lr)

	def _create_model(self, hu, lr):
	self.model = Sequential()
	self.model.add(Dense(hu, activation='relu',
	input_dim=self.n_features))
	self.model.add(Dense(hu, activation='relu'))
	self.model.add(Dense(2, activation='linear'))
	self.model.compile(loss='mse', optimizer=opt(learning_rate=lr))

	def _reshape(self, state):
	state = state.flatten()
	return np.reshape(state, [1, len(state)])

	def act(self, state):
	if random.random() < self.epsilon:
	return self.env.action_space.sample()
	return np.argmax(self.model.predict(state)[0])

	def replay(self):
	batch = random.sample(self.memory, self.batch_size)
	for state, action, next_state, reward, done in batch:
	if not done:
	reward += self.gamma * np.amax(
	self.model.predict(next_state)[0])
	target = self.model.predict(state)
	target[0, action] = reward
	self.model.fit(state, target, epochs=1, verbose=False)
	if self.epsilon > self.epsilon_min:
	self.epsilon *= self.epsilon_decay

	def learn(self, episodes):
	for e in range(1, episodes + 1):
	self.episodes += 1
	state, _ = self.env.reset()
	state = self._reshape(state)
	treward = 0
	for f in range(1, 5000):
	self.f = f
	action = self.act(state)
	next_state, reward, done, trunc, _ = self.env.step(action)
	treward += reward
	next_state = self._reshape(next_state)
	self.memory.append(
	[state, action, next_state, reward, done])
	state = next_state
	if done:
	self.trewards.append(treward)
	self.max_treward = max(self.max_treward, treward)
	templ = f'episode={self.episodes:4d} \| '
	templ += f'treward={treward:7.3f}'
	templ += f' \| max={self.max_treward:7.3f}'
	print(templ, end='\r')
	break
	if len(self.memory) > self.batch_size:
	self.replay()
	print()

	def test(self, episodes, min_accuracy=0.0,
	min_performance=0.0, verbose=True,
	full=True):
	ma = self.env.min_accuracy
	self.env.min_accuracy = min_accuracy
	if hasattr(self.env, 'min_performance'):
	mp = self.env.min_performance
	self.env.min_performance = min_performance
	self.performances = list()
	for e in range(1, episodes + 1):
	state, _ = self.env.reset()
	state = self._reshape(state)
	for f in range(1, 5001):
	action = np.argmax(self.model.predict(state)[0])
	state, reward, done, trunc, _ = self.env.step(action)
	state = self._reshape(state)
	if done:
	templ = f'total reward={f:4d} \| '
	templ += f'accuracy={self.env.accuracy:.3f}'
	if hasattr(self.env, 'min_performance'):
	self.performances.append(self.env.performance)
	templ += f' \| performance={self.env.performance:.3f}'
	if verbose:
	if full:
	print(templ)
	else:
	print(templ, end='\r')
	break
	self.env.min_accuracy = ma
	if hasattr(self.env, 'min_performance'):
	self.env.min_performance = mp
	print()