Created
July 13, 2011 01:50
-
-
Save wesm/1079577 to your computer and use it in GitHub Desktop.
Summarizing an Interactive Brokers activity statement
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A little script for summarizing my Interactive Broker statements | |
""" | |
from pandas import * | |
from pandas.util.testing import set_trace as st | |
import numpy as np | |
from BeautifulSoup import BeautifulSoup | |
def get_table_by_id(tables): | |
table_dict = {} | |
for table in tables: | |
name = _get_attr(table, 'id') | |
if name: | |
table_dict[name] = table | |
return table_dict | |
def _get_attr(elt, needle): | |
for kind, value in elt.attrs: | |
if kind == needle: | |
return value | |
return None | |
def scrape_perf_table(perf_table): | |
rows = perf_table.findAll('tr') | |
colnames = ['Symbol', 'Description', | |
'MTM MTD', 'MTM YTD', | |
'Realized ST MTD', 'Realized ST YTD', | |
'Realized LT MTD', 'Realized LT YTD'] | |
all_data = [] | |
for row in rows: | |
entries = row.findAll('td') | |
if len(entries) != 11: | |
continue | |
row_data = [] | |
for entry in entries: | |
if _get_attr(entry, 'class') == 'td_spacer': | |
continue | |
row_data.append(entry.text.replace(',', '')) | |
all_data.append(row_data) | |
converted = {} | |
for name, col in zip(colnames, zip(*all_data)): | |
try: | |
converted[name] = np.array(col, dtype=float) | |
except: | |
converted[name] = np.array(col) | |
return DataFrame(converted, columns=colnames) | |
def _get_underlying(symbol): | |
splt = symbol.split() | |
sym = splt[0] | |
return sym | |
if len(splt) > 1: | |
sym = '%s Option' % sym | |
return sym | |
def _get_kind(symbol): | |
return 'Option' if len(symbol.split()) == 2 else 'Stock' | |
def get_perf_table(path): | |
soup = BeautifulSoup(open(path).read()) | |
body = soup.html.body | |
tables = body.findAll('table') | |
tables_by_id = get_table_by_id(tables) | |
perf_table = tables_by_id['tblMTDYTDPerfSum'] | |
frame = scrape_perf_table(perf_table) | |
frame['Underlying'] = frame['Symbol'].map(_get_underlying) | |
frame['Kind'] = frame['Symbol'].map(_get_kind) | |
return frame | |
def get_grouped_pl(path): | |
frame = get_perf_table(path) | |
mtm = {} | |
realized = {} | |
for kind, group in frame.groupby('Kind'): | |
mtm_grouped = group['MTM YTD'].groupby(group['Underlying']).sum().order() | |
realized_grouped = group['Realized ST YTD'].groupby(group['Underlying']).sum().order() | |
mtm[kind] = mtm_grouped | |
realized[kind] = realized_grouped | |
mtm = DataFrame(mtm).fillna(0) | |
realized = DataFrame(realized).fillna(0) | |
return mtm, realized | |
mtm_2011, realized_2011 = get_grouped_pl('ActivityStatement.html') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment