Last active
March 6, 2025 16:31
-
-
Save databento-bot/9fbcd43ccd8077f2d8b92fba7b40fa31 to your computer and use it in GitHub Desktop.
Demonstrate adverse selection and market impact of aggressive/passive limit orders in US equities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import databento as db | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
DATE = pd.Timestamp(year=2023, month=6, day=22, tz='US/Eastern') | |
NUM_TIME_SAMPLES = 1000 | |
SYMBOL = 'NVDA' | |
WINDOW_LIMITS_US = 120 * 1e6 | |
# Compute markouts | |
def markout(mbp_data, events, offsets, aggressor=False): | |
results = np.empty( | |
shape=(len(events), len(offsets), 1), | |
) | |
df = mbp_data.groupby(level='ts_event').last() | |
for i, (ts_event, event) in enumerate(events.iterrows()): | |
markouts = df['midprice'].reindex(offsets+ts_event, method='ffill') - event['price'] | |
if not aggressor and event['side'] == 'B': | |
# Passive sell | |
markouts *= -1 | |
elif aggressor and event['side'] == 'A': | |
markouts *= -1 | |
markouts_in_mils = markouts * 1e4 | |
results[i] = markouts_in_mils.values.reshape(-1,1) | |
# Transpose and calculate the median, returning a single array of size (2n - 1) | |
return pd.DataFrame( | |
data=np.nanmean(results, 0), | |
index=offsets.total_seconds() * 1e6, | |
) | |
def plot_markouts(events, lt_thresholds=[100], gt_thresholds=[100,200], aggressor=True): | |
samples = np.geomspace(1e-3, WINDOW_LIMITS_US, num=NUM_TIME_SAMPLES) | |
offsets = pd.to_timedelta( | |
np.append(-np.flip(samples), np.append(0, samples)), | |
unit="microseconds", | |
) | |
# Use linear time steps instead | |
#samples = np.linspace(-WINDOW_LIMITS_US, WINDOW_LIMITS_US, num=NUM_TIME_SAMPLES) | |
#offsets = pd.to_timedelta(samples, unit="microseconds") | |
for threshold in lt_thresholds: | |
# Get trade events above size threshold | |
events_qual = events[events["size"] < threshold] | |
# Calculate markouts | |
plt.plot( | |
markout(df, events_qual, offsets, aggressor=aggressor), | |
drawstyle='steps-post', | |
label=f"order size < {threshold:d} (n={len(events_qual):,d})", | |
) | |
for threshold in gt_thresholds: | |
# Get trade events above size threshold | |
events_qual = events[events["size"] >= threshold] | |
# Calculate markouts | |
plt.plot( | |
markout(df, events_qual, offsets, aggressor=aggressor), | |
drawstyle='steps-post', | |
label=f"order size >= {threshold:d} (n={len(events_qual):,d})", | |
) | |
# Annotations | |
plt.title(f"Databento/XNAS.ITCH - {SYMBOL} {'trades' if aggressor else 'fills'} - {DATE.date()}") | |
plt.xlabel("Time since event (μs)") | |
plt.ylabel("PnL/share (mils)") | |
plt.xscale("symlog") | |
plt.grid(True) | |
plt.legend() | |
plt.show() | |
if __name__ == '__main__': | |
client = db.Historical() | |
mbp_data = client.timeseries.get_range( | |
dataset="XNAS.ITCH", | |
schema="mbp-1", | |
symbols=SYMBOL, | |
start=DATE, | |
end=DATE + pd.Timedelta(days=1), | |
) | |
df = mbp_data.to_df() | |
df = df.dropna() | |
df = df.set_index("ts_event") | |
# Calculate L1 midprice | |
df["midprice"] = df[["bid_px_00", "ask_px_00"]].mean(axis=1) | |
# Identify trade actions | |
trades = df[(df["action"] == "T") & (df["side"] != "N")] | |
plot_markouts(trades, aggressor=False) | |
# Since Nasdaq executions are reported on passive side | |
trades = trades.copy() | |
trades.loc[:, 'cost'] = trades['price'].mul(trades['size']) | |
agg_trades= trades.reset_index().groupby(['ts_event', 'action'], as_index=False).agg({'size': 'sum', 'side': 'first', 'cost':'sum'}) | |
agg_trades.loc[:, 'price'] = agg_trades['cost'].div(agg_trades['size']) | |
agg_trades.set_index('ts_event', inplace=True) | |
plot_markouts(agg_trades, aggressor=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Passive fills

Aggressive fills
