Last active
September 7, 2022 21:43
-
-
Save femtotrader/e0c89aeac24c1850a73673aef21b98ee to your computer and use it in GitHub Desktop.
Read Lobster orderbook files to Python Pandas DataFrame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Read Lobster orderbook files to Python Pandas DataFrame | |
""" | |
import click | |
import os | |
import pandas as pd | |
from pandas.io.common import ZipFile | |
from pandas.compat import StringIO | |
""" | |
data = '~/data' | |
ticker = 'AAPL' | |
dt = '2012-06-21' | |
level = 5 | |
""" | |
PRICE_MULT = 10000 | |
@click.command() | |
@click.option('--data', default='~/data', help='Data directory') | |
@click.option('--ticker', default='AAPL', help='Ticker') | |
@click.option('--dt', default='2012-06-21', help='Datetime') | |
@click.option('--level', default=5, help='Level') | |
def main(data, ticker, dt, level): | |
data = os.path.expanduser(data) | |
dt = pd.to_datetime(dt) | |
fname = os.path.join(data, "lobster", dt.strftime("%Y"), | |
dt.strftime("%Y-%m"), dt.strftime("%Y-%m-%d"), ticker, | |
"LOBSTER_SampleFile_{ticker}_{dt_day}_{level}.zip" | |
.format( | |
ticker=ticker, dt_day=dt.strftime("%Y-%m-%d"), level=level) | |
) | |
print("Load orderbook %s %s with level=%d" % (ticker, dt, level)) | |
with ZipFile(fname, 'r') as zf: | |
lst = zf.namelist() | |
fn_readme = '' | |
fn_orderbook = '' | |
fn_message = '' | |
for fn in lst: | |
fn_low = fn.lower() | |
if 'message' in fn_low: | |
fn_message = fn | |
elif 'orderbook' in fn_low: | |
fn_orderbook = fn | |
elif 'readme' in fn_low: | |
fn_readme = fn | |
df_message = message(zf, fn_message, dt) | |
df_orderbook = orderbook(zf, fn_orderbook, level) | |
# df = pd.concat([df_message, df_orderbook], axis=1) | |
def message(zf, fn, dt): | |
""" | |
Time: Seconds after midnight with decimal precision of at least milliseconds and up to nanoseconds depending on the period requested | |
Event Type: | |
1: Submission of a new limit order | |
2: Cancellation (partial deletion of a limit order) | |
3: Deletion (total deletion of a limit order) | |
4: Execution of a visible limit order | |
5: Execution of a hidden limit order | |
7: Trading halt indicator (detailed information below) | |
Order ID: Unique order reference number | |
Size: Number of shares | |
Price: Dollar price times 10000 (i.e. a stock price of $91.14 is given by 911400) | |
Direction: | |
-1: Sell limit order | |
1: Buy limit order | |
Note: Execution of a sell (buy) limit order corresponds to a buyer (seller) initiated trade, i.e. buy (sell) trade. | |
""" | |
data = zf.open(fn).read().decode() | |
columns = ['Time', 'Event Type', 'Order ID', 'Size', 'Price', 'Direction'] | |
df = pd.read_csv(StringIO(data), names=columns) | |
df['Time'] = pd.to_timedelta(df['Time'], unit='s') + dt | |
df['Price'] = df['Price'] / PRICE_MULT | |
return df | |
def orderbook(zf, fn, level): | |
""" | |
Ask Price 1: Level 1 ask price (best ask price) | |
Ask Size 1: Level 1 ask volume (best ask volume) | |
Bid Price 1: Level 1 bid price (best bid price) | |
Bid Size 1: Level 1 bid volume (best bid volume) | |
Ask Price 2: Level 2 ask price (second best ask price) | |
Ask Size 2: Level 2 ask volume (second best ask volume) | |
""" | |
def columns(level): | |
# ["Ask Price 1", "Ask Size 1", "Bid Price 1", "Bid Size 1", "Ask Price 2", "Ask Size 2", "Bid Price 2", "Bid Size 2", ...] | |
l = [["Ask Price %d" % i, "Ask Size %d" % i, "Bid Price %d" % i, "Bid Size %d" % i] for i in range(1, level+1)] | |
return [item for sublist in l for item in sublist] | |
data = zf.open(fn).read().decode() | |
df = pd.read_csv(StringIO(data), names=columns(level)) | |
for i in range(1, level+1): | |
df["Ask Price %d" % i] = df["Ask Price %d" % i] / PRICE_MULT | |
df["Bid Price %d" % i] = df["Bid Price %d" % i] / PRICE_MULT | |
return df | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ToDo: use enum / categorical for message Event Type, Direction
Time could be index (but ideally it should have unique values
http://stackoverflow.com/questions/34575126/create-a-dataframe-with-datetimeindex-with-unique-values-by-adding-a-timedelta/34576154#34576154
Concatenate (horizontaly) DataFrames
Streaming approach (without Pandas) to avoid to load the whole content to memory