Skip to content

Instantly share code, notes, and snippets.

@yhilpisch
Last active November 2, 2024 16:09
Show Gist options
  • Save yhilpisch/f49dd4abd264310ede2bc2e8abfb81c7 to your computer and use it in GitHub Desktop.
Save yhilpisch/f49dd4abd264310ede2bc2e8abfb81c7 to your computer and use it in GitHub Desktop.

Reinforcement Learning for Finance

Workshop at ODSC London 2024

Dr. Yves J. Hilpisch | The Python Quants | CPF Program

London, 06. September 2024

(short link to this Gist: http://bit.ly/odsc_ldn_2024)

Slides

You find the slides at:

http://certificate.tpq.io/odsc_ldn_2024.pdf

Book

You find an early (pre-print) version of my new book at:

https://certificate.tpq.io/rlfinance.html

The book on O'Reilly:

https://learning.oreilly.com/library/view/reinforcement-learning-for/9781098169169/

Resources

This Gist contains selected resources used during the workshop.

Social Media

https://cpf.tpq.io https://x.com/dyjh https://linkedin.com/in/dyjh/ https://github.com/yhilpisch https://youtube.com/c/yves-hilpisch https://bit.ly/quants_dev

Dislaimer

All the content, Python code, Jupyter Notebooks, and other materials (the “Material”) come without warranties or representations, to the extent permitted by applicable law.

None of the Material represents any kind of recommendation or investment advice.

The Material is only meant as a technical illustration.

(c) Dr. Yves J. Hilpisch

Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "475819a4-e148-4616-b1cb-44b659aeb08a",
"metadata": {},
"source": [
"<img src=\"http://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>"
]
},
{
"cell_type": "markdown",
"id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e",
"metadata": {},
"source": [
"# Reinforcement Learning for Finance\n",
"\n",
"**Chapter 08 &mdash; Asset Allocation**\n",
"\n",
"&copy; Dr. Yves J. Hilpisch\n",
"\n",
"<a href=\"http://tpq.io\" target=\"_blank\">http://tpq.io</a> | <a href=\"http://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
]
},
{
"cell_type": "markdown",
"id": "adcfb8e0-6497-4d2e-a261-6762373fd693",
"metadata": {},
"source": [
"## Capital Market Line"
]
},
{
"cell_type": "raw",
"id": "bcc20fa7-c4ce-44b7-b3ce-080856f592f9",
"metadata": {},
"source": [
"# tag::01[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b74284e7-9506-4793-bc99-016775313b22",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import math\n",
"import random\n",
"import numpy as np\n",
"import pandas as pd\n",
"from scipy import stats\n",
"from pylab import plt, mpl"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e80ce705-6c55-46d9-9199-549d8ea689f8",
"metadata": {},
"outputs": [],
"source": [
"plt.style.use('seaborn-v0_8')\n",
"mpl.rcParams['figure.dpi'] = 300\n",
"mpl.rcParams['savefig.dpi'] = 300\n",
"mpl.rcParams['font.family'] = 'serif'\n",
"np.set_printoptions(suppress=True)\n",
"pd.set_option('display.float_format', lambda x: '%.3f' % x)"
]
},
{
"cell_type": "raw",
"id": "ff01b860-4149-4169-b1c4-ea731c67afbf",
"metadata": {},
"source": [
"# end::01[]"
]
},
{
"cell_type": "raw",
"id": "fe5d336f-b40d-4151-a444-a8f6d550b588",
"metadata": {},
"source": [
"# tag::02[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee1c1641-9894-48dd-90f0-215c1d276c17",
"metadata": {},
"outputs": [],
"source": [
"r = 0.025 # <1>\n",
"beta = 0.2 # <2>\n",
"sigma = 0.375 # <3>\n",
"mu = r + beta * sigma # <4>\n",
"mu # <4>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7785eae1-d7cb-4ebd-a9b2-67f73703b667",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"vol = np.linspace(0, 0.5) # <5>\n",
"ret = r + beta * vol # <5>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdf3e816-6483-4694-8566-495bd7154511",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"fig, ax = plt.subplots()\n",
"plt.plot(vol, ret, 'b', label='capital market line (CML)')\n",
"plt.plot(0, r, 'g^', label='riskless asset')\n",
"plt.plot(sigma, mu, 'ro', label='market portfolio')\n",
"plt.xlabel('volatility/risk')\n",
"plt.ylabel('expected return')\n",
"ax.set_xticks((0, sigma))\n",
"ax.set_xticklabels((0, '$\\sigma$',))\n",
"ax.set_yticks((0, r, mu))\n",
"ax.set_yticklabels((0, '$r$', '$\\mu$'))\n",
"plt.ylim(0, 0.15)\n",
"plt.legend();\n",
"# plt.savefig('../figures/figure_08_01');"
]
},
{
"cell_type": "raw",
"id": "b2c6bacd-4fb4-4b10-8a3f-e752d6c285d5",
"metadata": {},
"source": [
"# end::02[]"
]
},
{
"cell_type": "markdown",
"id": "36861754-df58-40cf-a690-512291df5731",
"metadata": {},
"source": [
"## Investing Environment"
]
},
{
"cell_type": "raw",
"id": "1cd6d772-fd28-4c8d-b616-55251a8db30d",
"metadata": {},
"source": [
"# tag::03[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0585829d-c4a2-494f-a5b9-ba3a0c6d5b1c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class observation_space:\n",
" def __init__(self, n):\n",
" self.shape = (n,)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "754cfe0d-8fff-4d2d-b96a-d6c611de6226",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class action_space:\n",
" def __init__(self, n):\n",
" self.n = n\n",
" \n",
" def seed(self, seed):\n",
" random.seed(seed)\n",
" \n",
" def sample(self):\n",
" return random.random() # <1>"
]
},
{
"cell_type": "raw",
"id": "9ab9f6f6-bfa6-4736-8eea-819f4ffce652",
"metadata": {},
"source": [
"# end::03[]"
]
},
{
"cell_type": "raw",
"id": "d834537a-d04f-4772-a53d-f14e921f9d39",
"metadata": {},
"source": [
"# tag::04[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "622556ea-c7fc-4418-862e-c0403506f175",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Investing:\n",
" def __init__(self, S0, T, r_, mu_, sigma_, steps, amount):\n",
" self.initial_value = S0\n",
" self.maturity = T\n",
" self.short_rate_ = r_ # <1>\n",
" self.index_drift_ = mu_ # <1>\n",
" self.volatility_ = sigma_ # <1>\n",
" self.steps = steps\n",
" self.initial_balance = amount # <2>\n",
" self.portfolio_value = amount # <3>\n",
" self.portfolio_value_new = amount # <4>\n",
" self.observation_space = observation_space(4)\n",
" self.osn = self.observation_space.shape[0]\n",
" self.action_space = action_space(1)\n",
" self._generate_data()\n",
" self.portfolios = pd.DataFrame()\n",
" self.episode = 0"
]
},
{
"cell_type": "raw",
"id": "80cc319c-6eb6-4115-b352-8c392570286f",
"metadata": {},
"source": [
"# end::04[]"
]
},
{
"cell_type": "raw",
"id": "f9a4c607-f5d0-4fd4-95f5-fa17ccd2fe53",
"metadata": {},
"source": [
"# tag::05[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bae8dc41-21a9-46e6-abc4-5b18a83a2839",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Investing(Investing):\n",
" def _generate_data(self):\n",
" s = [self.initial_value]\n",
" self.short_rate = random.choice(self.short_rate_) # <1>\n",
" self.index_drift = random.choice(self.index_drift_) # <1>\n",
" self.volatility = random.choice(self.volatility_) # <1>\n",
" self.dt = self.maturity / self.steps\n",
" for t in range(1, self.steps + 1):\n",
" st = s[t - 1] * math.exp(\n",
" ((self.index_drift - self.volatility ** 2 / 2) * self.dt +\n",
" self.volatility * math.sqrt(self.dt) * random.gauss(0, 1))\n",
" ) # <2>\n",
" s.append(st)\n",
" self.data = pd.DataFrame(s, columns=['Xt'])\n",
" self.data['Yt'] = self.initial_value * np.exp(\n",
" self.short_rate * np.arange(len(self.data)) * self.dt) # <3>"
]
},
{
"cell_type": "raw",
"id": "fecb868b-073a-4d44-870b-b7223fb159d6",
"metadata": {},
"source": [
"# end::05[]"
]
},
{
"cell_type": "raw",
"id": "39e024df-2ef8-4880-af81-b6b3faf2dbf0",
"metadata": {},
"source": [
"# tag::06[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "303c0a10-95af-42e1-8f97-0c9ecc91d4d7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Investing(Investing):\n",
" def _get_state(self):\n",
" Xt = self.data['Xt'].iloc[self.bar]\n",
" Yt = self.data['Yt'].iloc[self.bar]\n",
" return np.array([Xt, Yt, self.xt, self.yt]), {}\n",
" \n",
" def seed(self, seed=None):\n",
" if seed is not None:\n",
" random.seed(seed)\n",
" \n",
" def reset(self):\n",
" self.bar = 0\n",
" self.xt = 0\n",
" self.yt = 0\n",
" self.treward = 0\n",
" self.portfolio_value = self.initial_balance\n",
" self.portfolio_value_new = self.initial_balance\n",
" self.episode += 1\n",
" self._generate_data()\n",
" self.state, _ = self._get_state()\n",
" return self.state, _"
]
},
{
"cell_type": "raw",
"id": "761d2793-e808-487e-9e9c-1c0c551a7a90",
"metadata": {},
"source": [
"# end::06[]"
]
},
{
"cell_type": "raw",
"id": "3856a1db-3153-4479-9474-41d7cc53e0ec",
"metadata": {},
"source": [
"# tag::07[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ceb2abe-a786-4fcf-a570-24e7099cfae8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class Investing(Investing):\n",
" def add_results(self, pl):\n",
" df = pd.DataFrame({'e': self.episode, 'xt': self.xt,\n",
" 'yt': self.yt, 'pv': self.portfolio_value,\n",
" 'pv_new': self.portfolio_value_new, 'p&l[$]': pl, \n",
" 'p&l[%]': pl / self.portfolio_value_new,\n",
" 'Xt': self.state[0], 'Yt': self.state[1],\n",
" 'Xt_new': self.new_state[0],\n",
" 'Yt_new': self.new_state[1],\n",
" 'r': self.short_rate, 'mu': self.index_drift,\n",
" 'sigma': self.volatility}, index=[0])\n",
" self.portfolios = pd.concat((self.portfolios, df),\n",
" ignore_index=True)\n",
" \n",
" def step(self, action):\n",
" self.bar += 1\n",
" self.new_state, _ = self._get_state()\n",
" if self.bar == 1: # <1>\n",
" self.xt = action # <2>\n",
" self.yt = (1 - action) # <3>\n",
" pl = 0.\n",
" reward = 0.\n",
" self.add_results(pl)\n",
" else:\n",
" self.portfolio_value_new = (\n",
" self.xt * self.portfolio_value *\n",
" self.new_state[0] / self.state[0] +\n",
" self.yt * self.portfolio_value *\n",
" self.new_state[1] / self.state[1]) # <4>\n",
" pl = self.portfolio_value_new - self.portfolio_value # <5>\n",
" self.xt = action # <6>\n",
" self.yt = (1 - action) # <7>\n",
" self.add_results(pl) # <8>\n",
" reward = pl # <9>\n",
" self.portfolio_value = self.portfolio_value_new # <10>\n",
" if self.bar == len(self.data) - 1:\n",
" done = True\n",
" else:\n",
" done = False\n",
" self.state = self.new_state\n",
" return self.state, reward, done, False, {}"
]
},
{
"cell_type": "raw",
"id": "ec195e72-d855-415c-9505-6b0ec1cff8a5",
"metadata": {},
"source": [
"# end::07[]"
]
},
{
"cell_type": "raw",
"id": "b2ff2a12-17f4-44ed-a641-1e93ea5af96c",
"metadata": {},
"source": [
"# tag::08[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35fc17a3-cdac-4fff-960b-b2a5e900bb2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"S0 = 1."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "31dfa81a-3ff7-4d93-ad75-d60326a300eb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing = Investing(S0=S0, T=1.0, r_=[0.05], mu_=[0.3],\n",
" sigma_=[0.35], steps=252, amount=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "130fa362-6876-4749-88fa-077160c3de51",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing.seed(750)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eceaf5ef-48f2-437f-99a6-1e8fcadbef6a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing._generate_data()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da77a7be-7ebc-4f00-8aa8-e07d4029a7a5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing.data.plot(style=['g--', 'b:'], lw=1.0)\n",
"plt.xlabel('time step')\n",
"plt.ylabel('price');\n",
"# plt.savefig('../figures/figure_08_02.png');"
]
},
{
"cell_type": "raw",
"id": "5818dd3c-2aeb-4096-bd47-06644ff1374a",
"metadata": {},
"source": [
"# end::08[]"
]
},
{
"cell_type": "markdown",
"id": "f7a5bc13-9c60-4f35-93c6-ff4091375089",
"metadata": {},
"source": [
"## Investing Agent"
]
},
{
"cell_type": "raw",
"id": "cfe6d2a2-2c9e-40c9-b28a-985c9c203be7",
"metadata": {},
"source": [
"# tag::09[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3686b411-5f39-487c-91bb-bc794320e590",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing.reset()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3325e79-9775-45e5-9796-cea322034eea",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"for _ in range(investing.steps - 1):\n",
" investing.step(investing.action_space.sample())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2cf19e9-f371-44f6-9a1e-ccbc126f44f7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing.portfolios.head().round(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24ef9a67-2628-4d0e-a911-57702ba9a239",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing.portfolios[['Xt', 'Yt', 'pv']].plot(\n",
" title='PORTFOLIO VALUE | RANDOM AGENT',\n",
" style=['g--', 'b:', 'r-'], lw=1)\n",
"plt.xlabel('time step')\n",
"plt.ylabel('value');\n",
"# plt.savefig('../figures/figure_08_03.png');"
]
},
{
"cell_type": "raw",
"id": "44166346-6bb7-4cef-800d-f6a66c30d651",
"metadata": {},
"source": [
"# end::09[]"
]
},
{
"cell_type": "raw",
"id": "a52562b8-176f-4207-bf6a-cf381d77fa0f",
"metadata": {},
"source": [
"# tag::10[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81052058-331d-41af-99f6-803514a933fc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from dqlagent import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "493701f2-b72d-45f0-87da-f2f1fb98130b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"opt = keras.optimizers.legacy.Adam"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1bd8fb5d-39f5-4e83-9118-c78846e545a0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class InvestingAgent(DQLAgent):\n",
" def _create_model(self, hu, lr):\n",
" self.model = Sequential()\n",
" self.model.add(Dense(hu, input_dim=self.n_features,\n",
" activation='relu'))\n",
" self.model.add(Dense(hu, activation='relu'))\n",
" self.model.add(Dense(1, activation='linear')) # <1>\n",
" self.model.compile(loss='mse',\n",
" optimizer=opt(learning_rate=lr))"
]
},
{
"cell_type": "raw",
"id": "8a2edc83-3bfa-4154-815f-7d5bb81c60a9",
"metadata": {},
"source": [
"# end::10[]"
]
},
{
"cell_type": "raw",
"id": "e72ea993-84b6-4757-a281-820c260e78c5",
"metadata": {},
"source": [
"# tag::11[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47f55cb0-9513-4a20-aad6-7cba3fc0be7a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from scipy.optimize import minimize"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6085e91b-cc2e-4c99-8d05-205892fb0272",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class InvestingAgent(InvestingAgent):\n",
" def opt_action(self, state):\n",
" bnds = [(0, 1)] # <1>\n",
" def f(state, x): # <2>\n",
" s = state.copy()\n",
" s[0, self.xp] = x # <3>\n",
" s[0, self.yp] = 1 - x # <4>\n",
" return self.model.predict(s)[0, 0] # <5>\n",
" action = minimize(lambda x: -f(state, x), 0.5,\n",
" bounds=bnds, method='Nelder-Mead',\n",
" )['x'][0] # <6>\n",
" return action\n",
" \n",
" def act(self, state):\n",
" if random.random() <= self.epsilon:\n",
" return self.env.action_space.sample()\n",
" action = self.opt_action(state) # <7>\n",
" return action"
]
},
{
"cell_type": "raw",
"id": "146d1691-62c0-42c3-a5f1-3281e4472c2b",
"metadata": {},
"source": [
"# end::11[]"
]
},
{
"cell_type": "raw",
"id": "5e9d40af-9c1c-4213-88b6-3988a75d25c3",
"metadata": {},
"source": [
"# tag::12[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f14d3677-9499-4c71-973e-3b9008028dcc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class InvestingAgent(InvestingAgent):\n",
" def replay(self):\n",
" batch = random.sample(self.memory, self.batch_size)\n",
" for state, action, next_state, reward, done in batch:\n",
" ns = next_state.copy()\n",
" target = reward\n",
" if not done:\n",
" action = self.opt_action(ns) # <1>\n",
" ns[0, self.xp] = action # <2>\n",
" ns[0, self.yp] = 1 - action # <3>\n",
" target += (self.gamma *\n",
" self.model.predict(ns)[0, 0]) # <4>\n",
" self.model.fit(state, np.array([target]),\n",
" epochs=1, verbose=False)\n",
" if self.epsilon > self.epsilon_min:\n",
" self.epsilon *= self.epsilon_decay"
]
},
{
"cell_type": "raw",
"id": "bcb34fb7-bcdc-465b-a26c-082f4a1364ff",
"metadata": {},
"source": [
"# end::12[]"
]
},
{
"cell_type": "raw",
"id": "6593422d-214c-4ccf-9145-e92013e44a96",
"metadata": {},
"source": [
"# tag::13[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5c90b10-fa10-41c4-ad62-555ea76b7c63",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class InvestingAgent(InvestingAgent):\n",
" def test(self, episodes, verbose=True):\n",
" for e in range(1, episodes + 1):\n",
" state, _ = self.env.reset()\n",
" state = self._reshape(state)\n",
" treward = 0\n",
" for _ in range(1, len(self.env.data) + 1):\n",
" action = self.opt_action(state)\n",
" state, reward, done, trunc, _ = self.env.step(action)\n",
" state = self._reshape(state)\n",
" treward += reward\n",
" if done:\n",
" templ = f'episode={e} | '\n",
" templ += f'total reward={treward:4.2f}'\n",
" if verbose:\n",
" print(templ, end='\\r')\n",
" break"
]
},
{
"cell_type": "raw",
"id": "d769d9be-afcb-48fa-bc27-1c404e2fcdf5",
"metadata": {},
"source": [
"# end::13[]"
]
},
{
"cell_type": "raw",
"id": "99d3eef9-3054-4fd8-ae78-944a31702b78",
"metadata": {},
"source": [
"# tag::14[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8473e2f-f8e8-4a47-93e3-faf10c00ee9c",
"metadata": {},
"outputs": [],
"source": [
"def set_seeds(seed=500):\n",
" random.seed(seed)\n",
" np.random.seed(seed)\n",
" tf.random.set_seed(seed)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4bea541-1657-4264-9690-d98b02be7c2e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"set_seeds()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "309333d7-bdb2-4dbd-917b-5ac793213585",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"investing = Investing(S0=S0, T=1.0, r_=[0, 0.025, 0.05],\n",
" mu_=[0.05, 0.1, 0.15],\n",
" sigma_=[0.1, 0.2, 0.3], steps=252, amount=1) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f442546f-b5da-4081-ab10-69c62132ae4d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent = InvestingAgent('2FS', feature=None, n_features=4,\n",
" env=investing, hu=128, lr=0.00025)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb1c0f47-8ce8-4767-84fd-376ee6a5ab96",
"metadata": {},
"outputs": [],
"source": [
"agent.xp = 2 # <1>\n",
"agent.yp = 3 # <2>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "196cd97b-abe1-4944-b63f-7da29f97c11b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"episodes = 64"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "118377b4-cb91-4b33-b7e7-2851b549d48d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%time agent.learn(episodes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5da9d2b-70e0-4178-afc6-13c677766fef",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.epsilon"
]
},
{
"cell_type": "raw",
"id": "a57d45e2-1970-4a58-82f0-7aa56f92803c",
"metadata": {},
"source": [
"# end::14[]"
]
},
{
"cell_type": "raw",
"id": "c61f9a93-6fcf-48f3-9fd6-013a55cdf6a6",
"metadata": {},
"source": [
"# tag::15[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6230a337-c7ab-45f7-8507-3a8cb72dce9b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.env.portfolios = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8d4652b-9e71-4a0a-b38b-26181f53a4c8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%time agent.test(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "151903f4-d5e7-4f9b-afc9-c737d2809b71",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"n = max(agent.env.portfolios['e']) # <1>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb84d944-8933-4046-bca6-737f411393f4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"res = agent.env.portfolios[agent.env.portfolios['e'] == n]\n",
"res.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb3926e0-d82a-4624-b49b-f7324905020d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"p = res.iloc[0][['r', 'mu', 'sigma']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b78af9a-a0a9-454e-bbdf-ffde5435f1a2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"t = f\"r={p['r']} | mu={p['mu']} | sigma={p['sigma']}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6bb1b0b-aded-4bd0-a787-18aeeca0b83f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"res[['Xt', 'Yt', 'pv']].plot(\n",
" title='PORTFOLIO VALUE | ' + t,\n",
" style=['g--', 'b:', 'r-'], lw=1)\n",
"plt.xlabel('time step')\n",
"plt.ylabel('value');\n",
"# plt.savefig('../figures/figure_08_04.png');"
]
},
{
"cell_type": "raw",
"id": "6ec789f9-02e6-49e1-90f1-45c877958652",
"metadata": {},
"source": [
"# end::15[]"
]
},
{
"cell_type": "raw",
"id": "a1583359-3523-4692-bb48-4aff076d399e",
"metadata": {},
"source": [
"# tag::16[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85ef4f89-1a10-4ecc-af5b-f4750d1559b9",
"metadata": {},
"outputs": [],
"source": [
"rets = res[['Xt', 'Yt', 'pv']].pct_change(\n",
" ).mean() / agent.env.dt # <1>\n",
"rets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99e927ca-cef3-4e0c-a9be-81a805f69936",
"metadata": {},
"outputs": [],
"source": [
"stds = res[['Xt', 'Yt', 'pv']].pct_change(\n",
" ).std() / math.sqrt(agent.env.dt) # <2>\n",
"stds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2dcaf29-b3e2-417d-b723-7509b8e8168e",
"metadata": {},
"outputs": [],
"source": [
"rets[['Xt', 'pv']] / stds[['Xt', 'pv']] # <3>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fecb6a4d-e6f0-4013-b0ee-390d48ee571a",
"metadata": {},
"outputs": [],
"source": [
"res['xt'].mean() # <4>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c997138c-f7a8-4072-ba53-076105dd2416",
"metadata": {},
"outputs": [],
"source": [
"res['xt'].std() # <5>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d53e260c-fdce-42d3-8058-f155cd418f54",
"metadata": {},
"outputs": [],
"source": [
"res['xt'].plot(title='RISKY ALLOCATION | ' + t,\n",
" lw=1.0, c='b')\n",
"plt.ylim(res['xt'].min() - 0.1, res['xt'].max() + 0.1)\n",
"plt.xlabel('time step');\n",
"# plt.savefig('../figures/figure_08_05.png');"
]
},
{
"cell_type": "raw",
"id": "196578c8-62ea-4def-84eb-b41bcd003b5d",
"metadata": {},
"source": [
"# end::16[]"
]
},
{
"cell_type": "raw",
"id": "8dd2957b-0d6d-4e40-bfd5-4a0f8318ee38",
"metadata": {},
"source": [
"# tag::17[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9237f945-6618-4314-b477-d34bc2389c5c",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios.groupby('mu')['xt'].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ff529ed-da46-4eb4-b97d-f62f6abb4f2b",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios.groupby('sigma')['xt'].describe()"
]
},
{
"cell_type": "raw",
"id": "5dab67cd-80df-423c-ba31-04a2123e0ad0",
"metadata": {},
"source": [
"# end::17[]"
]
},
{
"cell_type": "raw",
"id": "ee8ba3ec-f3fd-47da-9700-b4b49f25417d",
"metadata": {},
"source": [
"# tag::18[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b8c12127-857d-4459-af42-388866b0caec",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios.groupby('mu')['pv_new'].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58bfed3a-46d3-4fa1-a753-90c8a143ff82",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios.groupby('sigma')['pv_new'].describe()"
]
},
{
"cell_type": "raw",
"id": "17afb6ce-5e0c-4bfa-851b-89891b6385ee",
"metadata": {},
"source": [
"# end::18[]"
]
},
{
"cell_type": "raw",
"id": "4bb17062-9655-49f2-8c01-9da1a3d30864",
"metadata": {},
"source": [
"# tag::19[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "179d5611-ad66-4bc8-96a7-81ef6c7e1443",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"n = max(agent.env.portfolios['e']) # <1>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c57cc688-6511-40af-bd3d-a1cee4dc02b4",
"metadata": {},
"outputs": [],
"source": [
"res = agent.env.portfolios[agent.env.portfolios['e'] == n]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e965dcc4-82a9-4f1c-b19b-59dde5e3048a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"p = res.iloc[0][['r', 'mu', 'sigma']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "370451e0-7b09-4d8b-85c7-13b5e91f38c4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"t = f\"r={p['r']} | mu={p['mu']} | sigma={p['sigma']}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2edc590-2370-4d68-9d9d-f7e28d0e3455",
"metadata": {},
"outputs": [],
"source": [
"ax = res[['Xt', 'Yt', 'pv', 'xt']].plot(\n",
" title='PORTFOLIO VALUE | ' + t,\n",
" style=['g--', 'b:', 'r-', 'm-.'], lw=1,\n",
" secondary_y='xt'\n",
")\n",
"# plt.savefig('../figures/figure_08_06.png');"
]
},
{
"cell_type": "raw",
"id": "6203fcfe-9a40-4832-b981-8f9945b2320b",
"metadata": {},
"source": [
"# end::19[]"
]
},
{
"cell_type": "raw",
"id": "703bb96c-a86c-47d7-9109-a3b7a9c138c7",
"metadata": {},
"source": [
"xxxxx"
]
},
{
"cell_type": "markdown",
"id": "6d304115-a4dd-4a5b-967e-2f4a3ddb07b6",
"metadata": {},
"source": [
"## Two Assets"
]
},
{
"cell_type": "raw",
"id": "1d283e02-311a-4fda-8faa-dbb078c9795a",
"metadata": {},
"source": [
"# tag::20[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc0b66c9-03fd-4ee5-be97-5259ec0985fa",
"metadata": {},
"outputs": [],
"source": [
"class Investing(Investing):\n",
" def __init__(self, asset_one='.SPX', asset_two='.VIX',\n",
" steps=252, amount=1):\n",
" self.asset_one = asset_one\n",
" self.asset_two = asset_two\n",
" self.steps = steps\n",
" self.initial_balance = amount\n",
" self.portfolio_value = amount\n",
" self.portfolio_value_new = amount\n",
" self.observation_space = observation_space(4)\n",
" self.osn = self.observation_space.shape[0]\n",
" self.action_space = action_space(1)\n",
" self.retrieved = False\n",
" self._generate_data()\n",
" self.portfolios = pd.DataFrame()\n",
" self.episode = 0\n",
" \n",
" def _generate_data(self):\n",
" if self.retrieved:\n",
" pass\n",
" else:\n",
" url = 'https://certificate.tpq.io/rl4finance.csv' # <1>\n",
" self.raw = pd.read_csv(url, index_col=0,\n",
" parse_dates=True).dropna() # <1>\n",
" self.retrieved = True\n",
" self.data = pd.DataFrame()\n",
" self.data['Xt'] = self.raw[self.asset_one]\n",
" self.data['Yt'] = self.raw[self.asset_two]\n",
" s = random.randint(self.steps, len(self.data)) # <2>\n",
" self.data = self.data.iloc[s-self.steps:s] # <3>\n",
" self.data = self.data / self.data.iloc[0] # <4>"
]
},
{
"cell_type": "raw",
"id": "ab919ff2-27b9-4f95-a414-4312b5ecb025",
"metadata": {},
"source": [
"# end::20[]"
]
},
{
"cell_type": "raw",
"id": "c43b6efb-7886-4cd8-8d22-161c9671d615",
"metadata": {},
"source": [
"# tag::21[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe69c425-e014-4590-bf66-4c9ec0ceba76",
"metadata": {},
"outputs": [],
"source": [
"class Investing(Investing): \n",
" def _get_state(self):\n",
" Xt = self.data['Xt'].iloc[self.bar]\n",
" Yt = self.data['Yt'].iloc[self.bar]\n",
" self.date = self.data.index[self.bar] # <1>\n",
" return np.array([Xt, Yt, Xt - Yt, self.xt, self.yt]), {} # <2>\n",
" \n",
" def add_results(self, pl):\n",
" df = pd.DataFrame({\n",
" 'e': self.episode, 'date': self.date, # <3>\n",
" 'xt': self.xt, 'yt': self.yt,\n",
" 'pv': self.portfolio_value,\n",
" 'pv_new': self.portfolio_value_new, 'p&l[$]': pl, \n",
" 'p&l[%]': pl / self.portfolio_value_new * 100,\n",
" 'Xt': self.state[0], 'Yt': self.state[1],\n",
" 'Xt_new': self.new_state[0],\n",
" 'Yt_new': self.new_state[1],\n",
" }, index=[0])\n",
" self.portfolios = pd.concat((self.portfolios, df),\n",
" ignore_index=True)"
]
},
{
"cell_type": "raw",
"id": "4faff16c-f4ff-4e80-af94-9507a8451365",
"metadata": {},
"source": [
"# end::21[]"
]
},
{
"cell_type": "raw",
"id": "ca81e3db-3315-47bf-9b59-377269c2c288",
"metadata": {},
"source": [
"# tag::22[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4bcbe3c-a50e-4649-a8f1-726ead4f4f1f",
"metadata": {},
"outputs": [],
"source": [
"class Investing(Investing):\n",
" def step(self, action):\n",
" self.bar += 1\n",
" self.new_state, info = self._get_state()\n",
" if self.bar == 1:\n",
" self.xt = action\n",
" self.yt = (1 - action)\n",
" pl = 0.\n",
" reward = 0.\n",
" self.add_results(pl)\n",
" else:\n",
" self.portfolio_value_new = (\n",
" self.xt * self.portfolio_value *\n",
" self.new_state[0] / self.state[0] +\n",
" self.yt * self.portfolio_value *\n",
" self.new_state[1] / self.state[1])\n",
" pl = self.portfolio_value_new - self.portfolio_value\n",
" pen = (self.xt - action) ** 2 # <1>\n",
" self.xt = action\n",
" self.yt = (1 - action)\n",
" self.add_results(pl)\n",
" ret = self.portfolios['p&l[%]'].iloc[-1] / 100 * 252 # <2>\n",
" vol = self.portfolios['p&l[%]'].rolling(\n",
" 20, min_periods=1).std().iloc[-1] * math.sqrt(252) # <3>\n",
" sharpe = ret / vol # <4>\n",
" reward = sharpe - pen # <5>\n",
" self.portfolio_value = self.portfolio_value_new\n",
" if self.bar == len(self.data) - 1:\n",
" done = True\n",
" else:\n",
" done = False\n",
" self.state = self.new_state\n",
" return self.state, reward, done, False, {}"
]
},
{
"cell_type": "raw",
"id": "169373d8-bf0a-45f0-8f72-986b3a323f7f",
"metadata": {},
"source": [
"# end::22[]"
]
},
{
"cell_type": "raw",
"id": "a7a56d26-6ed5-4ec8-b535-2df0440e0de6",
"metadata": {},
"source": [
"# tag::23[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bcac62b5-c184-4a47-a4fc-fe7a1b551b6e",
"metadata": {},
"outputs": [],
"source": [
"days = 2 * 252"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5aee535b-ef6d-487e-87c2-ddbe1cfb9322",
"metadata": {},
"outputs": [],
"source": [
"investing = Investing(steps=days)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b485de0b-e13a-4037-abc7-6dfc154ad646",
"metadata": {},
"outputs": [],
"source": [
"investing.data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6553101-ad40-47f8-8100-5ab143a96fd3",
"metadata": {},
"outputs": [],
"source": [
"investing.data.corr() # <1>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "299564ea-e440-48d0-943e-e5d60a684c79",
"metadata": {},
"outputs": [],
"source": [
"investing.data.plot(secondary_y='Yt',\n",
" style=['b', 'g--'], lw=1);\n",
"# plt.savefig('../figures/figure_08_07.png');"
]
},
{
"cell_type": "raw",
"id": "bccadced-4841-47cc-a73e-adcfcd7f541b",
"metadata": {},
"source": [
"# end::23[]"
]
},
{
"cell_type": "raw",
"id": "8822ef35-364e-4525-9a6c-d368446f40e5",
"metadata": {},
"source": [
"# tag::24[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fc0e6d20-82fc-4715-aa9b-2105ca649282",
"metadata": {},
"outputs": [],
"source": [
"set_seeds()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21030e1f-e322-46b3-8ef1-383e65b710a0",
"metadata": {},
"outputs": [],
"source": [
"investing = Investing(steps=days)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a81d98e2-c171-4960-a1df-73967a3b8d03",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent = InvestingAgent('2AC', feature=None, n_features=5,\n",
" env=investing, hu=48, lr=0.0005)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f1f6bbff-4f0f-4bdd-8a39-b62d4087d751",
"metadata": {},
"outputs": [],
"source": [
"agent.xp = 3 # <1>\n",
"agent.yp = 4 # <2>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "824d6c19-8142-4e49-89b4-aebd302c3115",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"episodes = 250"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2435dc88-3e67-4f9a-a9c0-e018b9394475",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%time agent.learn(episodes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2dbd842b-b4f6-4ee6-bcc5-730bdcd246be",
"metadata": {},
"outputs": [],
"source": [
"agent.epsilon"
]
},
{
"cell_type": "raw",
"id": "4f12bada-99b5-4e3c-a9a2-216a89a92a2a",
"metadata": {},
"source": [
"# end::24[]"
]
},
{
"cell_type": "raw",
"id": "eaa16df1-ffa3-4db9-8951-d1290a1c73a9",
"metadata": {},
"source": [
"# tag::25[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90859de3-ca33-4d78-8f13-fd87f8ed19f9",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ba5cbc7",
"metadata": {},
"outputs": [],
"source": [
"%time agent.test(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3785aeb2-67a5-4e73-a32b-1d2aeeddee82",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios['xt'].describe()"
]
},
{
"cell_type": "raw",
"id": "9efaf486-c271-47a9-b840-1c1c14e80bf7",
"metadata": {},
"source": [
"# end::25[]"
]
},
{
"cell_type": "raw",
"id": "acf27b4e-5271-4f4a-bb38-167dd186b45f",
"metadata": {},
"source": [
"# tag::26[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d23876a8-9eae-42c4-896c-8372b03cee01",
"metadata": {},
"outputs": [],
"source": [
"n = max(agent.env.portfolios['e']) - 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5997ea73-8c7b-4e2c-8ade-5e5d77b554c3",
"metadata": {},
"outputs": [],
"source": [
"res = agent.env.portfolios[\n",
" agent.env.portfolios['e'] == n].set_index('date')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95b78b84-43fa-4996-be72-cb4b676f9e0a",
"metadata": {},
"outputs": [],
"source": [
"res['xt'].plot(lw=1, c='b')\n",
"plt.ylim(res['xt'].min() - 0.1, res['xt'].max() + 0.1)\n",
"plt.ylabel('allocation (asset 1)');\n",
"# plt.savefig('../figures/figure_08_08.png');"
]
},
{
"cell_type": "raw",
"id": "10511320-44b3-4f6d-8817-b4efc6b3dec2",
"metadata": {},
"source": [
"# end::26[]"
]
},
{
"cell_type": "raw",
"id": "74058686-1c21-4636-923b-abb9aafac50d",
"metadata": {},
"source": [
"# tag::27[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5c5e9de2-e82e-4a54-91b1-2b5e2b111706",
"metadata": {},
"outputs": [],
"source": [
"res[['Xt', 'Yt', 'pv']].iloc[-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "788e7c6b-5d9f-4891-ab00-02d32e17379d",
"metadata": {},
"outputs": [],
"source": [
"r = np.log(res[['Xt', 'Yt', 'pv']] /\n",
" res[['Xt', 'Yt', 'pv']].shift(1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87594fb2-c4c4-41bd-a81f-2b456eecb95e",
"metadata": {},
"outputs": [],
"source": [
"rets = np.exp(r.mean() * 252) - 1\n",
"rets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4313a90-1c13-4a6e-9600-1933f2049e64",
"metadata": {},
"outputs": [],
"source": [
"stds = r.std() * math.sqrt(252)\n",
"stds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec14b2d5-86cf-4095-96dd-d2b757dd400f",
"metadata": {},
"outputs": [],
"source": [
"rets / stds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55814e23-bdab-4776-be41-134d76619f47",
"metadata": {},
"outputs": [],
"source": [
"res[['Xt', 'Yt', 'pv']].plot(\n",
" title='PORTFOLIO VALUE',\n",
" style=['g--', 'b:', 'r-'],\n",
" lw=1, grid=True)\n",
"plt.ylabel('value');\n",
"# plt.savefig('../figures/figure_08_09.png');"
]
},
{
"cell_type": "raw",
"id": "5cce3f0b-13b9-4281-87ff-baa473c906c7",
"metadata": {},
"source": [
"# end::27[]"
]
},
{
"cell_type": "raw",
"id": "49b2755b-5a29-47fb-b418-31540cc69fcf",
"metadata": {},
"source": [
"# tag::28[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5e8b746-3587-4263-974a-2b124f24ea46",
"metadata": {},
"outputs": [],
"source": [
"values = agent.env.portfolios.groupby('e')[\n",
" ['Xt', 'Yt', 'pv_new']].last()\n",
"values.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "913ee35a-7b05-4b85-86cb-b0768626b001",
"metadata": {},
"outputs": [],
"source": [
"values.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88c8233e-c2ae-471f-8045-640ed0a34ee2",
"metadata": {},
"outputs": [],
"source": [
"((values['pv_new'] > values['Xt']) &\n",
" (values['pv_new'] > values['Yt'])).value_counts()"
]
},
{
"cell_type": "raw",
"id": "735c5ca3-49c1-4b80-a48c-d67e7bff0951",
"metadata": {},
"source": [
"# end::28[]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0ff32cb-0858-4f7b-acf5-c72e4f53dd53",
"metadata": {},
"outputs": [],
"source": [
"agent.env.portfolios.groupby(['e'])['pv_new'].last()"
]
},
{
"cell_type": "markdown",
"id": "c3207ecc-7632-4b56-b369-f5fe60ef4e27",
"metadata": {},
"source": [
"<img src=\"http://hilpisch.com/tpq_logo.png\" alt=\"The Python Quants\" width=\"35%\" align=\"right\" border=\"0\"><br>\n",
"\n",
"<a href=\"http://tpq.io\" target=\"_blank\">http://tpq.io</a> | <a href=\"http://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:[email protected]\">[email protected]</a>"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
#
# Deep Q-Learning Agent
#
# (c) Dr. Yves J. Hilpisch
# Reinforcement Learning for Finance
#
import os
import random
import warnings
import numpy as np
import tensorflow as tf
from tensorflow import keras
from collections import deque
from keras.layers import Dense, Flatten
from keras.models import Sequential
warnings.simplefilter('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
opt = keras.optimizers.legacy.Adam
class DQLAgent:
def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001):
self.epsilon = 1.0
self.epsilon_decay = 0.9975
self.epsilon_min = 0.1
self.memory = deque(maxlen=2000)
self.batch_size = 32
self.gamma = 0.5
self.trewards = list()
self.max_treward = -np.inf
self.n_features = n_features
self.env = env
self.episodes = 0
self._create_model(hu, lr)
def _create_model(self, hu, lr):
self.model = Sequential()
self.model.add(Dense(hu, activation='relu',
input_dim=self.n_features))
self.model.add(Dense(hu, activation='relu'))
self.model.add(Dense(2, activation='linear'))
self.model.compile(loss='mse', optimizer=opt(learning_rate=lr))
def _reshape(self, state):
state = state.flatten()
return np.reshape(state, [1, len(state)])
def act(self, state):
if random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def replay(self):
batch = random.sample(self.memory, self.batch_size)
for state, action, next_state, reward, done in batch:
if not done:
reward += self.gamma * np.amax(
self.model.predict(next_state)[0])
target = self.model.predict(state)
target[0, action] = reward
self.model.fit(state, target, epochs=1, verbose=False)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def learn(self, episodes):
for e in range(1, episodes + 1):
self.episodes += 1
state, _ = self.env.reset()
state = self._reshape(state)
treward = 0
for f in range(1, 5000):
self.f = f
action = self.act(state)
next_state, reward, done, trunc, _ = self.env.step(action)
treward += reward
next_state = self._reshape(next_state)
self.memory.append(
[state, action, next_state, reward, done])
state = next_state
if done:
self.trewards.append(treward)
self.max_treward = max(self.max_treward, treward)
templ = f'episode={self.episodes:4d} | '
templ += f'treward={treward:7.3f}'
templ += f' | max={self.max_treward:7.3f}'
print(templ, end='\r')
break
if len(self.memory) > self.batch_size:
self.replay()
print()
def test(self, episodes, min_accuracy=0.0,
min_performance=0.0, verbose=True,
full=True):
ma = self.env.min_accuracy
self.env.min_accuracy = min_accuracy
if hasattr(self.env, 'min_performance'):
mp = self.env.min_performance
self.env.min_performance = min_performance
self.performances = list()
for e in range(1, episodes + 1):
state, _ = self.env.reset()
state = self._reshape(state)
for f in range(1, 5001):
action = np.argmax(self.model.predict(state)[0])
state, reward, done, trunc, _ = self.env.step(action)
state = self._reshape(state)
if done:
templ = f'total reward={f:4d} | '
templ += f'accuracy={self.env.accuracy:.3f}'
if hasattr(self.env, 'min_performance'):
self.performances.append(self.env.performance)
templ += f' | performance={self.env.performance:.3f}'
if verbose:
if full:
print(templ)
else:
print(templ, end='\r')
break
self.env.min_accuracy = ma
if hasattr(self.env, 'min_performance'):
self.env.min_performance = mp
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment