Skip to content

Instantly share code, notes, and snippets.

@yhilpisch
Last active March 7, 2025 23:38
Show Gist options
  • Save yhilpisch/f49dd4abd264310ede2bc2e8abfb81c7 to your computer and use it in GitHub Desktop.
Save yhilpisch/f49dd4abd264310ede2bc2e8abfb81c7 to your computer and use it in GitHub Desktop.

Reinforcement Learning for Finance

Workshop at ODSC London 2024

Dr. Yves J. Hilpisch | The Python Quants | CPF Program

London, 06. September 2024

(short link to this Gist: http://bit.ly/odsc_ldn_2024)

Slides

You find the slides at:

http://certificate.tpq.io/odsc_ldn_2024.pdf

Book

You find an early (pre-print) version of my new book at:

https://certificate.tpq.io/rlfinance.html

The book on O'Reilly:

https://learning.oreilly.com/library/view/reinforcement-learning-for/9781098169169/

Resources

This Gist contains selected resources used during the workshop.

Social Media

https://cpf.tpq.io https://x.com/dyjh https://linkedin.com/in/dyjh/ https://github.com/yhilpisch https://youtube.com/c/yves-hilpisch https://bit.ly/quants_dev

Dislaimer

All the content, Python code, Jupyter Notebooks, and other materials (the “Material”) come without warranties or representations, to the extent permitted by applicable law.

None of the Material represents any kind of recommendation or investment advice.

The Material is only meant as a technical illustration.

(c) Dr. Yves J. Hilpisch

Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "475819a4-e148-4616-b1cb-44b659aeb08a",
"metadata": {},
"source": [
"<img src=\"https://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>"
]
},
{
"cell_type": "markdown",
"id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e",
"metadata": {},
"source": [
"# Reinforcement Learning for Finance\n",
"\n",
"**Chapter 07 &mdash; Dynamic Hedging**\n",
"\n",
"&copy; Dr. Yves J. Hilpisch\n",
"\n",
"<a href=\"https://tpq.io\" target=\"_blank\">https://tpq.io</a> | <a href=\"https://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
]
},
{
"cell_type": "markdown",
"id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e",
"metadata": {},
"source": [
"## Delta Hedging"
]
},
{
"cell_type": "raw",
"id": "bcc20fa7-c4ce-44b7-b3ce-080856f592f9",
"metadata": {},
"source": [
"# tag::01[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b74284e7-9506-4793-bc99-016775313b22",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import math\n",
"import random\n",
"import numpy as np\n",
"import pandas as pd\n",
"from scipy import stats\n",
"from pylab import plt, mpl"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e80ce705-6c55-46d9-9199-549d8ea689f8",
"metadata": {},
"outputs": [],
"source": [
"plt.style.use('seaborn-v0_8')\n",
"mpl.rcParams['figure.dpi'] = 300\n",
"mpl.rcParams['savefig.dpi'] = 300\n",
"mpl.rcParams['font.family'] = 'serif'\n",
"np.set_printoptions(suppress=True)"
]
},
{
"cell_type": "raw",
"id": "fd098209-20e3-4a88-8b1c-94e7c784229b",
"metadata": {},
"source": [
"# end::01[]"
]
},
{
"cell_type": "raw",
"id": "86610d76-e354-4db3-89c7-522a41872bce",
"metadata": {},
"source": [
"# tag::02[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5cf2bc18-3029-4a69-baf7-21d1efcd54a4",
"metadata": {},
"outputs": [],
"source": [
"from bsm73 import bsm_call_value"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92fd8b53-2281-4332-9306-c8416e88d8b1",
"metadata": {},
"outputs": [],
"source": [
"S0 = 100 # <1>\n",
"K = 100 # <2>\n",
"T = 1. # <3>\n",
"t = 0. # <4>\n",
"r = 0.05 # <5>\n",
"sigma = 0.2 # <6>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57fd3991-aeb8-408d-aba8-e704fb68e97b",
"metadata": {},
"outputs": [],
"source": [
"bsm_call_value(S0, K, T, t, r, sigma)"
]
},
{
"cell_type": "raw",
"id": "b2c6bacd-4fb4-4b10-8a3f-e752d6c285d5",
"metadata": {},
"source": [
"# end::02[]"
]
},
{
"cell_type": "raw",
"id": "78487203-1f09-4c58-bd91-46592c9b32d9",
"metadata": {},
"source": [
"# tag::03[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8aa7775b-3842-414f-b6c3-3e1f7dd4b176",
"metadata": {},
"outputs": [],
"source": [
"random.seed(1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c255e96e-11b2-4e66-a990-372a59b1f418",
"metadata": {},
"outputs": [],
"source": [
"def simulate_gbm(S0, T, r, sigma, steps=100):\n",
" gbm = [S0]\n",
" dt = T / steps\n",
" for t in range(1, steps + 1):\n",
" st = gbm[-1] * math.exp((r - sigma ** 2 / 2) * dt\n",
" + sigma * math.sqrt(dt) * random.gauss(0, 1))\n",
" gbm.append(st)\n",
" return gbm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00c6d682-1f6e-4e53-93ff-a8db7b3d6626",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"gbm = simulate_gbm(S0, T, r, sigma)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08e95452-a460-4cb4-945b-f40a4293d055",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"plt.plot(gbm, lw=1.0, c='b')\n",
"plt.xlabel('time step')\n",
"plt.ylabel('stock price');\n",
"# plt.savefig('../figures/figure_07_01.png');"
]
},
{
"cell_type": "raw",
"id": "795c69a4-bb68-4567-99e7-96fd98d91ab9",
"metadata": {},
"source": [
"# end::03[]"
]
},
{
"cell_type": "raw",
"id": "4143ebf1-744c-417c-88f8-87cfb02f9741",
"metadata": {},
"source": [
"# tag::04[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a749b90-e603-488a-830a-fa71e4df6891",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def bsm_delta(St, K, T, t, r, sigma):\n",
" d1 = ((math.log(St / K) + (r + 0.5 * sigma ** 2) * (T - t)) /\n",
" (sigma * math.sqrt(T - t)))\n",
" return stats.norm.cdf(d1, 0, 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3075a0b-5a3b-4a6e-8602-5c790fd8875c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"S_ = range(40, 181, 4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80f18d97-4f6b-46a7-9b6d-a237b4b8dc11",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"d = [bsm_delta(s, K, T, 0, r, sigma) for s in S_]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21e59d07-88de-4a5d-882a-a3626db0135d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"plt.plot(S_, d, lw=1.0, c='b')\n",
"plt.xlabel('stock price')\n",
"plt.ylabel('delta');\n",
"# plt.savefig('../figures/figure_07_02.png');"
]
},
{
"cell_type": "raw",
"id": "db5aef56-32e7-498a-9e5a-8f8e13f8f26f",
"metadata": {},
"source": [
"# end::04[]"
]
},
{
"cell_type": "raw",
"id": "f38c9bed-c508-4203-a2f5-f3e06b08bd93",
"metadata": {},
"source": [
"# tag::05[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee3aeefe-0fbb-40bc-b71a-a3f8b30a092a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"dt = T / (len(gbm) - 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc4bcce9-9d70-4105-b116-4d00622d5947",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"bond = [math.exp(r * i * dt) for i in range(len(gbm))]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df109760-a22c-4ef0-8f52-b641c4b60d62",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def option_replication():\n",
" res = pd.DataFrame()\n",
" for i in range(len(gbm) - 1):\n",
" C = bsm_call_value(gbm[i], K, T, i * dt, r, sigma)\n",
" if i == 0:\n",
" s = bsm_delta(gbm[i], K, T, i * dt, r, sigma) # <1>\n",
" b = (C - s * gbm[i]) / bond[i] # <2>\n",
" else:\n",
" V = s * gbm[i] + b * bond[i] # <3>\n",
" s = bsm_delta(gbm[i], K, T, i * dt, r, sigma) # <4>\n",
" b = (C - s * gbm[i]) / bond[i] # <5>\n",
" df = pd.DataFrame({'St': gbm[i], 'C': C, 'V': V,\n",
" 's': s, 'b': b}, index=[0]) # <6>\n",
" res = pd.concat((res, df), ignore_index=True) # <6>\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b75706b-0254-4334-9a76-033386d96108",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"res = option_replication()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f91730af-4da5-4111-a511-26d5a2f14d0e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"res[['C', 'V']].plot(style=['b', 'r--'], lw=1)\n",
"plt.xlabel('time step')\n",
"plt.ylabel('value');\n",
"# plt.savefig('../figures/figure_07_03.png');"
]
},
{
"cell_type": "raw",
"id": "31cc1b28-8948-4ea8-8981-97e1721ddbc1",
"metadata": {},
"source": [
"# end::05[]"
]
},
{
"cell_type": "raw",
"id": "e34475ee-b198-4d06-a171-705bc69701c4",
"metadata": {},
"source": [
"# tag::06[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1e661ef-0660-4191-999b-7442072fdf8c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"(res['V'] - res['C']).mean() # <1>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "697c07af-406f-497c-8348-508c7a1d6389",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"((res['V'] - res['C']) ** 2).mean() # <2>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d430c959-eec8-489b-ac68-f2d2b096bcd2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"(res['V'] - res['C']).hist(bins=35, color='b')\n",
"plt.xlabel('P&L')\n",
"plt.ylabel('frequency');\n",
"# plt.savefig('../figures/figure_07_04.png');"
]
},
{
"cell_type": "raw",
"id": "8ff5f226-1b99-4afd-9ec1-d597d9fe69d4",
"metadata": {},
"source": [
"# end::06[]"
]
},
{
"cell_type": "markdown",
"id": "36861754-df58-40cf-a690-512291df5731",
"metadata": {},
"source": [
"## Hedging Environment"
]
},
{
"cell_type": "raw",
"id": "1cd6d772-fd28-4c8d-b616-55251a8db30d",
"metadata": {},
"source": [
"# tag::07[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0585829d-c4a2-494f-a5b9-ba3a0c6d5b1c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class observation_space:\n",
" def __init__(self, n):\n",
" self.shape = (n,)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "754cfe0d-8fff-4d2d-b96a-d6c611de6226",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class action_space:\n",
" def __init__(self, n):\n",
" self.n = n\n",
" def seed(self, seed):\n",
" random.seed(seed)\n",
" def sample(self):\n",
" return random.random() # <1>"
]
},
{
"cell_type": "raw",
"id": "9ab9f6f6-bfa6-4736-8eea-819f4ffce652",
"metadata": {},
"source": [
"# end::07[]"
]
},
{
"cell_type": "raw",
"id": "d834537a-d04f-4772-a53d-f14e921f9d39",
"metadata": {},
"source": [
"# tag::08[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "622556ea-c7fc-4418-862e-c0403506f175",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Hedging:\n",
" def __init__(self, S0, K_, T, r_, sigma_, steps):\n",
" self.initial_value = S0\n",
" self.strike_ = K_ # <1>\n",
" self.maturity = T\n",
" self.short_rate_ = r_ # <1>\n",
" self.volatility_ = sigma_ # <1>\n",
" self.steps = steps\n",
" self.observation_space = observation_space(5)\n",
" self.osn = self.observation_space.shape[0]\n",
" self.action_space = action_space(1)\n",
" self._simulate_data()\n",
" self.portfolios = pd.DataFrame()\n",
" self.episode = 0"
]
},
{
"cell_type": "raw",
"id": "80cc319c-6eb6-4115-b352-8c392570286f",
"metadata": {},
"source": [
"# end::08[]"
]
},
{
"cell_type": "raw",
"id": "f9a4c607-f5d0-4fd4-95f5-fa17ccd2fe53",
"metadata": {},
"source": [
"# tag::09[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bae8dc41-21a9-46e6-abc4-5b18a83a2839",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Hedging(Hedging):\n",
" def _simulate_data(self):\n",
" s = [self.initial_value]\n",
" self.strike = random.choice(self.strike_) # <1>\n",
" self.short_rate = random.choice(self.short_rate_) # <1>\n",
" self.volatility = random.choice(self.volatility_) # <1>\n",
" self.dt = self.maturity / self.steps\n",
" for t in range(1, self.steps + 1):\n",
" st = s[t - 1] * math.exp(\n",
" ((self.short_rate - self.volatility ** 2 / 2) * self.dt +\n",
" self.volatility * math.sqrt(self.dt) *\n",
" random.gauss(0, 1))) # <2>\n",
" s.append(st)\n",
" self.data = pd.DataFrame(s, columns=['index'])\n",
" self.data['bond'] = np.exp(self.short_rate *\n",
" np.arange(len(self.data)) * self.dt)"
]
},
{
"cell_type": "raw",
"id": "fecb868b-073a-4d44-870b-b7223fb159d6",
"metadata": {},
"source": [
"# end::09[]"
]
},
{
"cell_type": "raw",
"id": "39e024df-2ef8-4880-af81-b6b3faf2dbf0",
"metadata": {},
"source": [
"# tag::10[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "303c0a10-95af-42e1-8f97-0c9ecc91d4d7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Hedging(Hedging):\n",
" def _get_state(self):\n",
" St = self.data['index'].iloc[self.bar]\n",
" Bt = self.data['bond'].iloc[self.bar]\n",
" ttm = self.maturity - self.bar * self.dt\n",
" if ttm > 0:\n",
" Ct = bsm_call_value(St, self.strike,\n",
" self.maturity, self.bar * self.dt,\n",
" self.short_rate, self.volatility)\n",
" else:\n",
" Ct = max(St - self.strike, 0)\n",
" return np.array([St, Bt, ttm, Ct, self.strike, self.short_rate,\n",
" self.stock, self.bond]), {} \n",
" def seed(self, seed=None):\n",
" if seed is not None:\n",
" random.seed(seed)\n",
" def reset(self):\n",
" self.bar = 0\n",
" self.bond = 0\n",
" self.stock = 0\n",
" self.treward = 0\n",
" self.episode += 1\n",
" self._simulate_data()\n",
" self.state, _ = self._get_state()\n",
" return self.state, _"
]
},
{
"cell_type": "raw",
"id": "761d2793-e808-487e-9e9c-1c0c551a7a90",
"metadata": {},
"source": [
"# end::10[]"
]
},
{
"cell_type": "raw",
"id": "3856a1db-3153-4479-9474-41d7cc53e0ec",
"metadata": {},
"source": [
"# tag::11[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ceb2abe-a786-4fcf-a570-24e7099cfae8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Hedging(Hedging):\n",
" def step(self, action):\n",
" if self.bar == 0: # <1>\n",
" reward = 0\n",
" self.bar += 1\n",
" self.stock = float(action) # <2>\n",
" self.bond = ((self.state[3] - self.stock * self.state[0]) /\n",
" self.state[1]) # <3>\n",
" self.new_state, _ = self._get_state()\n",
" else:\n",
" self.bar += 1\n",
" self.new_state, _ = self._get_state()\n",
" phi_value = (self.stock * self.new_state[0] +\n",
" self.bond * self.new_state[1]) # <4>\n",
" pl = phi_value - self.new_state[3] # <5>\n",
" df = pd.DataFrame({'e': self.episode, 's': self.stock,\n",
" 'b': self.bond, 'phi': phi_value,\n",
" 'C': self.new_state[3], 'p&l[$]': pl,\n",
" 'p&l[%]': pl / max(self.new_state[3],\n",
" 1e-4) * 100,\n",
" 'St': self.new_state[0],\n",
" 'Bt': self.new_state[1],\n",
" 'K': self.strike, 'r': self.short_rate,\n",
" 'sigma': self.volatility},\n",
" index=[0]) # <6>\n",
" self.portfolios = pd.concat((self.portfolios, df),\n",
" ignore_index=True) # <6>\n",
" reward = -(phi_value - self.new_state[3]) ** 2 # <7>\n",
" self.stock = float(action) # <2>\n",
" self.bond = ((self.new_state[3] -\n",
" self.stock * self.new_state[0]) /\n",
" self.new_state[1]) # <3>\n",
" if self.bar == len(self.data) - 1: # <8>\n",
" done = True\n",
" else:\n",
" done = False\n",
" self.state = self.new_state\n",
" return self.state, float(reward), done, False, {}"
]
},
{
"cell_type": "raw",
"id": "ec195e72-d855-415c-9505-6b0ec1cff8a5",
"metadata": {},
"source": [
"# end::11[]"
]
},
{
"cell_type": "raw",
"id": "b2ff2a12-17f4-44ed-a641-1e93ea5af96c",
"metadata": {},
"source": [
"# tag::12[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35fc17a3-cdac-4fff-960b-b2a5e900bb2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"S0 = 100."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2729df4e-7f42-48f3-b3af-7fb2b6b53545",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging = Hedging(S0=S0,\n",
" K_=np.array([0.9, 0.95, 1., 1.05, 1.10]) * S0,\n",
" T=1.0, r_=[0, 0.01, 0.05],\n",
" sigma_=[0.1, 0.15, 0.2], steps=2 * 252) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "130fa362-6876-4749-88fa-077160c3de51",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging.seed(750)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80e7a078-5f57-44e3-876c-1709d64e4ce4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging._simulate_data()\n",
"(hedging.data / hedging.data.iloc[0]).plot(\n",
" lw=1.0, style=['r--', 'b-.'])\n",
"plt.xlabel('time step')\n",
"plt.ylabel('price');\n",
"# plt.savefig('../figures/figure_07_05.png');"
]
},
{
"cell_type": "raw",
"id": "5818dd3c-2aeb-4096-bd47-06644ff1374a",
"metadata": {},
"source": [
"# end::12[]"
]
},
{
"cell_type": "raw",
"id": "cfe6d2a2-2c9e-40c9-b28a-985c9c203be7",
"metadata": {},
"source": [
"# tag::13[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3325e79-9775-45e5-9796-cea322034eea",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging.reset()\n",
"for _ in range(hedging.steps - 1):\n",
" hedging.step(hedging.action_space.sample())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2cf19e9-f371-44f6-9a1e-ccbc126f44f7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging.portfolios.head().round(4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3af8aa96-6d43-4234-83ca-0e456a1cf356",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging.portfolios[['C', 'phi']].plot(\n",
" style=['r--', 'b-'], lw=1, alpha=0.7)\n",
"plt.xlabel('time step')\n",
"plt.ylabel('value');\n",
"# plt.savefig('../figures/figure_07_06.png');"
]
},
{
"cell_type": "raw",
"id": "44166346-6bb7-4cef-800d-f6a66c30d651",
"metadata": {},
"source": [
"# end::13[]"
]
},
{
"cell_type": "raw",
"id": "455a9240-96fe-4c12-8ea3-0612006529d7",
"metadata": {},
"source": [
"# tag::14[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ad77e9f-54ef-4518-b97b-ef573e777a37",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging.portfolios['p&l[$]'].apply(abs).sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6a61e91-45c7-435b-a198-82c3b68e5db9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedging.portfolios['p&l[$]'].hist(bins=35, color='b')\n",
"plt.xlabel('P&L')\n",
"plt.ylabel('frequency');\n",
"# plt.savefig('../figures/figure_07_07.png');"
]
},
{
"cell_type": "raw",
"id": "47519f1c-8369-465c-9943-eec35ede2843",
"metadata": {},
"source": [
"# end::14[]"
]
},
{
"cell_type": "raw",
"id": "a52562b8-176f-4207-bf6a-cf381d77fa0f",
"metadata": {},
"source": [
"# tag::15[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81052058-331d-41af-99f6-803514a933fc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from dqlagent import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83e1990b-3c0c-41fa-8208-b2a77f90ab51",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"random.seed(100)\n",
"tf.random.set_seed(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "753f5091-d0bb-4f47-b6ce-e16bca9ebdb2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"opt = keras.optimizers.legacy.Adam"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1bd8fb5d-39f5-4e83-9118-c78846e545a0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class HedgingAgent(DQLAgent):\n",
" def _create_model(self, hu, lr):\n",
" self.model = Sequential()\n",
" self.model.add(Dense(hu, input_dim=self.n_features,\n",
" activation='relu'))\n",
" self.model.add(Dense(hu, activation='relu'))\n",
" self.model.add(Dense(1, activation='linear')) # <1>\n",
" self.model.compile(loss='mse',\n",
" optimizer=opt(learning_rate=lr))"
]
},
{
"cell_type": "raw",
"id": "8a2edc83-3bfa-4154-815f-7d5bb81c60a9",
"metadata": {},
"source": [
"# end::15[]"
]
},
{
"cell_type": "raw",
"id": "e72ea993-84b6-4757-a281-820c260e78c5",
"metadata": {},
"source": [
"# tag::16[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47f55cb0-9513-4a20-aad6-7cba3fc0be7a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from scipy.optimize import minimize"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6085e91b-cc2e-4c99-8d05-205892fb0272",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class HedgingAgent(HedgingAgent):\n",
" def opt_action(self, state):\n",
" bnds = [(0, 1)] # <1>\n",
" def f(state, x): # <2>\n",
" s = state.copy()\n",
" s[0, 6] = x # <3>\n",
" s[0, 7] = ((s[0, 3] - x * s[0, 0]) / s[0, 1]) # <4>\n",
" return self.model.predict(s)[0, 0] # <5>\n",
" try:\n",
" action = minimize(lambda x: -f(state, x), 0.5,\n",
" bounds=bnds, method='Powell',\n",
" )['x'][0] # <6>\n",
" except:\n",
" action = self.env.stock\n",
" return action\n",
" \n",
" def act(self, state):\n",
" if random.random() <= self.epsilon:\n",
" return self.env.action_space.sample()\n",
" action = self.opt_action(state) # <7>\n",
" return action"
]
},
{
"cell_type": "raw",
"id": "146d1691-62c0-42c3-a5f1-3281e4472c2b",
"metadata": {},
"source": [
"# end::16[]"
]
},
{
"cell_type": "raw",
"id": "5e9d40af-9c1c-4213-88b6-3988a75d25c3",
"metadata": {},
"source": [
"# tag::17[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f14d3677-9499-4c71-973e-3b9008028dcc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class HedgingAgent(HedgingAgent):\n",
" def replay(self):\n",
" batch = random.sample(self.memory, self.batch_size)\n",
" for state, action, next_state, reward, done in batch:\n",
" target = reward\n",
" if not done:\n",
" ns = next_state.copy()\n",
" action = self.opt_action(ns) # <1>\n",
" ns[0, 6] = action # <2>\n",
" ns[0, 7] = ((ns[0, 3] -\n",
" action * ns[0, 0]) / ns[0, 1]) # <3>\n",
" target += (self.gamma *\n",
" self.model.predict(ns)[0, 0]) # <4>\n",
" self.model.fit(state, np.array([target]), epochs=1,\n",
" verbose=False)\n",
" if self.epsilon > self.epsilon_min:\n",
" self.epsilon *= self.epsilon_decay"
]
},
{
"cell_type": "raw",
"id": "bcb34fb7-bcdc-465b-a26c-082f4a1364ff",
"metadata": {},
"source": [
"# end::17[]"
]
},
{
"cell_type": "raw",
"id": "6593422d-214c-4ccf-9145-e92013e44a96",
"metadata": {},
"source": [
"# tag::18[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5c90b10-fa10-41c4-ad62-555ea76b7c63",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class HedgingAgent(HedgingAgent):\n",
" def test(self, episodes, verbose=True):\n",
" for e in range(1, episodes + 1):\n",
" state, _ = self.env.reset()\n",
" state = self._reshape(state)\n",
" treward = 0\n",
" for _ in range(1, len(self.env.data) + 1):\n",
" action = self.opt_action(state)\n",
" state, reward, done, trunc, _ = self.env.step(action)\n",
" state = self._reshape(state)\n",
" treward += reward\n",
" if done:\n",
" templ = f'total penalty={treward:4.2f}'\n",
" if verbose:\n",
" print(templ)\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4bea541-1657-4264-9690-d98b02be7c2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"random.seed(100)\n",
"np.random.seed(100)\n",
"tf.random.set_seed(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f442546f-b5da-4081-ab10-69c62132ae4d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedgingagent = HedgingAgent('SYM', feature=None, n_features=8,\n",
" env=hedging, hu=128, lr=0.0001)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "196cd97b-abe1-4944-b63f-7da29f97c11b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"episodes = 250"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "118377b4-cb91-4b33-b7e7-2851b549d48d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%time hedgingagent.learn(episodes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5da9d2b-70e0-4178-afc6-13c677766fef",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedgingagent.epsilon"
]
},
{
"cell_type": "raw",
"id": "ac829a56-0416-4b96-b57a-f5309a1e972f",
"metadata": {},
"source": [
"# end::18[]"
]
},
{
"cell_type": "raw",
"id": "a1583359-3523-4692-bb48-4aff076d399e",
"metadata": {},
"source": [
"# tag::19[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8d4652b-9e71-4a0a-b38b-26181f53a4c8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%time hedgingagent.test(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "151903f4-d5e7-4f9b-afc9-c737d2809b71",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"n = max(hedgingagent.env.portfolios['e']) # <1>\n",
"n -= 1 # <1>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61319424-d973-47bb-9009-e47824335315",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedgingagent.env.portfolios[\n",
" hedgingagent.env.portfolios['e'] == n]['p&l[$]'].describe() # <2>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c3ceebb-3277-422d-9b86-96eaac230ba6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"p = hedgingagent.env.portfolios[\n",
" hedgingagent.env.portfolios['e'] == n].iloc[0][\n",
" ['K', 'r', 'sigma']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22dd24cd-4334-4aa1-9c40-f4fdcfa452bf",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"title = f\"CALL | K={p['K']:.1f} | r={p['r']} | sigma={p['sigma']}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90ff0fe0-7707-4b0c-a50b-820909f98fca",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedgingagent.env.portfolios[\n",
" hedgingagent.env.portfolios['e'] == n][\n",
" ['phi', 'C', 'St']].iloc[:100].plot(\n",
" secondary_y='St', title=title, style=['r-', 'b--', 'g:'], lw=1)\n",
"plt.xlabel('time step')\n",
"plt.ylabel('value');\n",
"# plt.savefig('../figures/figure_07_08.png');"
]
},
{
"cell_type": "raw",
"id": "c20a7950-967f-4a71-81d1-3bc4a3c80ddf",
"metadata": {},
"source": [
"# end::19[]"
]
},
{
"cell_type": "raw",
"id": "9030a2f1-acb4-4562-8cec-1b57a540a10c",
"metadata": {},
"source": [
"# tag::20[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc30fee1-fee9-4b0c-b67d-c24efc05f817",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"hedgingagent.env.portfolios[\n",
" hedgingagent.env.portfolios['e'] == n]['p&l[$]'].hist(\n",
" bins=35, color='blue')\n",
"plt.title(title)\n",
"plt.xlabel('P&L')\n",
"plt.ylabel('frequency');\n",
"# plt.savefig('../figures/figure_07_09.png');"
]
},
{
"cell_type": "raw",
"id": "d4c3bfeb-a6b3-44e5-ae50-8e3b8b13b81a",
"metadata": {},
"source": [
"# end::20[]"
]
},
{
"cell_type": "markdown",
"id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55",
"metadata": {},
"source": [
"<img src=\"https://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>\n",
"\n",
"<a href=\"https://tpq.io\" target=\"_blank\">https://tpq.io</a> | <a href=\"https://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#
# Deep Q-Learning Agent
#
# (c) Dr. Yves J. Hilpisch
# Reinforcement Learning for Finance
#
import os
import random
import warnings
import numpy as np
import tensorflow as tf
from tensorflow import keras
from collections import deque
from keras.layers import Dense, Flatten
from keras.models import Sequential
warnings.simplefilter('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
opt = keras.optimizers.legacy.Adam
class DQLAgent:
def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001):
self.epsilon = 1.0
self.epsilon_decay = 0.9975
self.epsilon_min = 0.1
self.memory = deque(maxlen=2000)
self.batch_size = 32
self.gamma = 0.5
self.trewards = list()
self.max_treward = -np.inf
self.n_features = n_features
self.env = env
self.episodes = 0
self._create_model(hu, lr)
def _create_model(self, hu, lr):
self.model = Sequential()
self.model.add(Dense(hu, activation='relu',
input_dim=self.n_features))
self.model.add(Dense(hu, activation='relu'))
self.model.add(Dense(2, activation='linear'))
self.model.compile(loss='mse', optimizer=opt(learning_rate=lr))
def _reshape(self, state):
state = state.flatten()
return np.reshape(state, [1, len(state)])
def act(self, state):
if random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def replay(self):
batch = random.sample(self.memory, self.batch_size)
for state, action, next_state, reward, done in batch:
if not done:
reward += self.gamma * np.amax(
self.model.predict(next_state)[0])
target = self.model.predict(state)
target[0, action] = reward
self.model.fit(state, target, epochs=1, verbose=False)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def learn(self, episodes):
for e in range(1, episodes + 1):
self.episodes += 1
state, _ = self.env.reset()
state = self._reshape(state)
treward = 0
for f in range(1, 5000):
self.f = f
action = self.act(state)
next_state, reward, done, trunc, _ = self.env.step(action)
treward += reward
next_state = self._reshape(next_state)
self.memory.append(
[state, action, next_state, reward, done])
state = next_state
if done:
self.trewards.append(treward)
self.max_treward = max(self.max_treward, treward)
templ = f'episode={self.episodes:4d} | '
templ += f'treward={treward:7.3f}'
templ += f' | max={self.max_treward:7.3f}'
print(templ, end='\r')
break
if len(self.memory) > self.batch_size:
self.replay()
print()
def test(self, episodes, min_accuracy=0.0,
min_performance=0.0, verbose=True,
full=True):
ma = self.env.min_accuracy
self.env.min_accuracy = min_accuracy
if hasattr(self.env, 'min_performance'):
mp = self.env.min_performance
self.env.min_performance = min_performance
self.performances = list()
for e in range(1, episodes + 1):
state, _ = self.env.reset()
state = self._reshape(state)
for f in range(1, 5001):
action = np.argmax(self.model.predict(state)[0])
state, reward, done, trunc, _ = self.env.step(action)
state = self._reshape(state)
if done:
templ = f'total reward={f:4d} | '
templ += f'accuracy={self.env.accuracy:.3f}'
if hasattr(self.env, 'min_performance'):
self.performances.append(self.env.performance)
templ += f' | performance={self.env.performance:.3f}'
if verbose:
if full:
print(templ)
else:
print(templ, end='\r')
break
self.env.min_accuracy = ma
if hasattr(self.env, 'min_performance'):
self.env.min_performance = mp
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment