Last active
May 22, 2025 20:40
-
-
Save felipecrv/8b7865e705c4ea75ca6e60ff7928b997 to your computer and use it in GitHub Desktop.
Using the Python context management functionality to make using DuckDB's register/unregister functionality easy to use.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""DuckDB utilities.""" | |
import duckdb | |
class _RegisteredView: | |
def __init__(self, ddb, name, obj): | |
self.ddb = ddb | |
self.name = name | |
self.obj = obj | |
def __enter__(self): | |
self.ddb.register(self.name, self.obj) | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
self.ddb.unregister(self.name) | |
class _MultiRegisteredView: | |
def __init__(self, ddb, views): | |
self.ddb = ddb | |
self.views = views | |
def __enter__(self): | |
for name, obj in self.views: | |
self.ddb.register(name, obj) | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
for name, _ in self.views: | |
self.ddb.unregister(name) | |
class _Txn: | |
def __init__(self, ddb): | |
self.ddb = ddb | |
def __enter__(self): | |
self.ddb.begin() | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
if exc_type is None: | |
self.ddb.commit() | |
else: | |
self.ddb.rollback() | |
class Connection: | |
"""DuckDB connection wrapper.""" | |
_ddb: duckdb.DuckDBPyConnection | |
def __init__(self, ddb: duckdb.DuckDBPyConnection): | |
self._ddb = ddb | |
def connect( | |
database: str = ":memory:", | |
read_only: bool = False, | |
config: dict = None, | |
/, | |
) -> "Connection": | |
if config is None: | |
ddb = duckdb.connect(database, read_only) | |
else: | |
ddb = duckdb.connect(database, read_only, config) | |
return Connection(ddb) | |
def view(self, name: str, obj): | |
"""Register a view to a Python object on the DuckDB connection.""" | |
return _RegisteredView(self._ddb, name, obj) | |
def views(self, views: list): | |
"""Register multiple views to Python objects on the DuckDB connection.""" | |
return _MultiRegisteredView(self._ddb, views) | |
def txn(self): | |
return _Txn(self._ddb) | |
def execute(self, query, parameters=None, multiple_parameter_sets: bool = False): | |
return self._ddb.execute(query, parameters, multiple_parameter_sets) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Sharpe Ratio computation.""" | |
import pyarrow as pa | |
import ddb | |
from datetime import timedelta | |
ROLLING_SHARPE_RATIO_COLUMNS = set( | |
[ | |
"date", | |
"rf_rate", | |
"rf_log1p", | |
"log_return", | |
"excess_log_return", | |
"simple_return", | |
"excess_simple_return", | |
"window_start_date", | |
"window_trading_days", | |
"rolling_annual_log_return", | |
"rolling_annual_excess_log_return", | |
"rolling_annual_log_stddev", | |
"rolling_annual_volatility", | |
"rolling_sharpe_ratio", | |
] | |
) | |
def rolling_sharpe_ratio( | |
window: timedelta, | |
rolling_rf_rate: pa.Table, | |
returns: pa.Table, | |
requested_columns: list = ["date", "log_return", "rolling_sharpe_ratio"], | |
trading_days_per_year=252, | |
): | |
"""Compute the rolling Sharpe Ratio based on tables of returns and risk-free rates. | |
Parameters | |
---------- | |
window: timedelta | |
Rolling window size. | |
rolling_rf_rate: pa.Table | |
Table with annualized rolling risk-free rates, ideally for the same rolling | |
window. | |
date log1p rate | |
0 2023-12-29 0.025901 0.026240 | |
1 2024-01-05 0.025473 0.025800 | |
2 2024-01-12 0.024895 0.025207 | |
3 2024-01-19 0.024358 0.024658 | |
4 2024-01-26 0.023843 0.024130 | |
... ... ... | |
13 2024-03-29 0.022256 0.022506 | |
14 2024-04-05 0.022578 0.022835 | |
The risk-free rates table can be more sparse than the table of returns | |
but it should have the same start date of the earliest return date or | |
earlier. | |
returns: pa.Table | |
Table with date and daily log returns. | |
date log_return | |
0 2024-01-02 0.003139 | |
1 2024-01-03 0.000209 | |
2 2024-01-04 -0.009490 | |
3 2024-01-05 0.000121 | |
4 2024-01-08 0.013832 | |
... ... | |
61 2024-04-02 -0.009725 | |
62 2024-04-03 -0.004430 | |
63 2024-04-04 -0.014380 | |
64 2024-04-05 0.011506 | |
65 2024-04-08 -0.007358 | |
66 2024-04-09 0.001761 | |
Returns | |
------- | |
pa.Table | |
Table with the requested columns. | |
date log_return rolling_sharpe_ratio | |
0 2024-04-02 -0.009725 4.673809 | |
1 2024-04-03 -0.004430 4.460766 | |
2 2024-04-04 -0.014380 4.168770 | |
3 2024-04-05 0.011506 4.529497 | |
4 2024-04-08 -0.007358 3.773462 | |
5 2024-04-09 0.001761 3.731398 | |
""" | |
db = ddb.Connection.connect() | |
for col in requested_columns: | |
if col not in ROLLING_SHARPE_RATIO_COLUMNS and col != "*": | |
raise ValueError(f"Invalid requested column: {col}") | |
r = len(requested_columns) | |
if r == 0 or (r > 0 and requested_columns[0] != "date"): | |
requested_columns = ["date"] + requested_columns | |
requested_columns_str = ", ".join(requested_columns) | |
sql_query = f""" | |
WITH time_series AS ( | |
SELECT | |
"returns".date, | |
rf.rate rf_rate, | |
rf.log1p rf_log1p, | |
"returns".log_return, | |
FROM "returns" | |
ASOF JOIN rolling_rf_rate rf ON "returns".date >= rf.date | |
ORDER BY "returns".date | |
), | |
rolling AS ( | |
SELECT | |
date, | |
-- annualized risk-free rate | |
rf_rate, | |
rf_log1p, | |
-- daily returns and excess returns | |
log_return, | |
log_return - (rf_log1p / $trading_days_per_year) AS excess_log_return, | |
EXP(log_return) - 1 AS simple_return, | |
EXP(excess_log_return) - 1 AS excess_simple_return, | |
-- window offset and dimension | |
first_value(date) OVER rolling_window AS window_start_date, | |
COUNT(*) OVER rolling_window AS window_trading_days, | |
-- annualized values based on log returns | |
(SUM(log_return) OVER rolling_window * $trading_days_per_year) / | |
window_trading_days AS rolling_annual_log_return, | |
rolling_annual_log_return - rf_log1p AS rolling_annual_excess_log_return, | |
(STDDEV_POP(excess_log_return) OVER rolling_window) * | |
SQRT($trading_days_per_year) AS rolling_annual_log_stddev, | |
-- rolling volatility as a simple ratio | |
EXP(rolling_annual_log_stddev) - 1 AS rolling_annual_volatility, | |
-- sharpe ratio | |
rolling_annual_excess_log_return / rolling_annual_log_stddev AS rolling_sharpe_ratio, | |
FROM time_series | |
WINDOW rolling_window AS ( | |
ORDER BY date RANGE BETWEEN $window_m1 PRECEDING AND CURRENT ROW | |
) | |
) | |
SELECT {requested_columns_str} FROM rolling | |
WHERE | |
-- exclude the first window that may not have enough data | |
date >= (SELECT MIN(date) FROM rolling) + $window_m1 | |
""" | |
with db.views([("rolling_rf_rate", rolling_rf_rate), ("returns", returns)]): | |
return db.execute( | |
sql_query, | |
{ | |
"window_m1": window - timedelta(days=1), | |
"trading_days_per_year": trading_days_per_year, | |
}, | |
).fetch_arrow_table() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment