Last active
March 8, 2021 01:51
-
-
Save robodhruv/3eedd86d91fa89d86d896248e2337358 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Convert Robinhood Account Summary (CSV) to a Transaction Summary (CSV) you can use with a tax software like Glacier (or TaxAct, SprinTax etc. | |
Coupled with (https://github.com/JiahaoShan/Glacier-tax-1099-B-Stock-Transactions-Helper), use this script to autofill Robinhood transaction summary for your 1099-B tax docs on Glacier Tax Prep (http://glaciertax.com). | |
NOTE: This can manually handle stock splits, but does not account for referral stocks. You might have to handle them externally. | |
""" | |
from collections import deque | |
import pandas as pd | |
import numpy as np | |
import csv | |
def add_index(df): | |
df["id"] = range(len(df)) | |
return df | |
def filter_df(df, trans_codes=["BUY", "SELL"], date_filt=None): | |
# filter dataframe by transaction codes and date range | |
df_filt = df[df["Trans Code"].isin(trans_codes)] | |
if date_filt: | |
# filter by start and end ID | |
id_low = df_filt["id"] >= date_filt[0] | |
id_high = df_filt["id"] <= date_filt[1] | |
df_filt = df_filt[id_low & id_high] | |
return df_filt | |
def preprocess_dollars(df): | |
# convert $ strings to unsigned floats for "Price", "Amount" and "Quantity" | |
df["Price"] = df["Price"].replace('[\$\,]',"",regex=True).astype(float) | |
df["Amount"] = df["Amount"].replace('[\$\,\)\(]',"",regex=True).astype(float) | |
df["Quantity"] = df["Quantity"].astype(float) | |
return df | |
def adjust_stock_split(df, ticker, ID, ratio): | |
# adjust Qty and Price for stock splits (not very clean) | |
new_df = df | |
is_ticker = df["Instrument"]==ticker | |
is_older = df["id"]>ID | |
is_update = is_older & is_ticker | |
df.loc[is_update, "Quantity"] *= ratio | |
df.loc[is_update, "Price"] /= ratio | |
return df | |
def create_assetwise_dict(df): | |
assets = {} | |
tickers = pd.unique(df["Instrument"]) | |
for ticker in tickers: | |
assets[ticker] = df[df["Instrument"]==ticker] | |
return assets | |
def generate_transactions(df): | |
# generate BUY-SELL paired transactions from df of an asset | |
df = df.reindex(index=df.index[::-1]) | |
transactions = [] # [name, acquired, sold, proceeds, cost] | |
buy_orders = deque() | |
for idx, row in df.iterrows(): | |
ticker = row["Instrument"] | |
if row["Trans Code"] == "BUY": | |
# populate the queue | |
buy_orders.append(idx) | |
if row["Trans Code"] == "SELL": | |
# execute SELL routine | |
sell_amount = row["Amount"] | |
sell_qty = abs(row["Quantity"]) | |
while len(buy_orders) > 0: | |
curr_id = buy_orders[0] | |
order1 = df.loc[curr_id] | |
if order1["Quantity"] <= sell_qty: | |
# need more orders | |
curr_id = buy_orders.popleft() | |
order1 = df.loc[curr_id] | |
order_qty = order1["Quantity"] | |
transactions.append([ticker, order1["Activity Date"], row["Activity Date"], order_qty*row["Price"], order_qty*order1["Price"]]) | |
sell_qty -= order_qty | |
else: | |
# trim the first order | |
order_qty = sell_qty | |
transactions.append([ticker, order1["Activity Date"], row["Activity Date"], order_qty*row["Price"], order_qty*order1["Price"]]) | |
df.loc[curr_id, "Quantity"] = df.loc[curr_id, "Quantity"] - order_qty | |
df.loc[curr_id, "Amount"] = df.loc[curr_id, "Quantity"]*df.loc[curr_id, "Price"] | |
sell_qty = 0 | |
if sell_qty < 1e-9: break | |
if not sell_qty < 1e-9: | |
print(sell_qty) | |
raise Exception("Insufficient buy orders") | |
return transactions | |
def verify_buy_sell(asset_tran): | |
# verify gross buy-sell prices for debugging (true values available on form 1099) | |
if len(asset_tran) == 0: | |
return "No Buy Orders" | |
sell = np.array(asset_tran)[:, 3].astype(np.float64) | |
buy = np.array(asset_tran)[:, 4].astype(np.float64) | |
return(np.sum(buy), np.sum(sell)) | |
activity = pd.read_csv("activity.csv") # upload your Robinhood account statement CSV files (you can request this over email) | |
# activity.keys() | |
## ['Activity Date', 'Process Date', 'Settle Date', 'Account Type', | |
## 'Instrument', 'Description', 'Trans Code', 'Quantity', 'Price', | |
## 'Amount', 'Suppressed'] | |
activity = add_index(activity) | |
activity_stocks = preprocess_dollars(filter_df(activity, date_filt=[112, 1231])) # filter by financial year date range by providing IDs of first and last transaction in the year | |
activity_stocks = adjust_stock_split(activity_stocks, "AAPL", 536, 4) # find and specify the ID of a stock split and its ratio (e.g. AAPL had a split in Aug 2021; can be automated or set by date) | |
asset_dict = create_assetwise_dict(activity_stocks) | |
all_transactions = [] | |
for ticker in asset_dict.keys(): | |
print(ticker) | |
asset_tran = generate_transactions(asset_dict[ticker]) | |
print(verify_buy_sell(asset_tran)) | |
all_transactions.extend(asset_tran) | |
all_transactions.insert(0, ["name", "acquired", "sold", "proceeds", "cost"]) | |
with open("transactions.csv", "w", newline="") as f: | |
writer = csv.writer(f) | |
writer.writerows(all_transactions) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment