Last active
April 27, 2023 02:32
-
-
Save MattSegal/78eeb7584af009eacc1a7cf85db830d2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
from datetime import timedelta, datetime | |
import streamlit as st | |
import pandas as pd | |
IGNORE_DESCRIPTIONS = [ | |
"Internal Transfer", | |
"International Transaction Fee", | |
] | |
RENT = [ | |
"Woodards Norther", # Rent Carlton | |
# Rent Rose street | |
"NELS RENT", | |
"Rose St Rent", | |
"DEFT RENT", | |
"ASM - Direct Debit", | |
# House moving | |
"FANTASTIC SERVICES", # Cleaners | |
"MAN WITH A VAN", | |
] | |
HEALTH_INSURANCE = [ | |
"DOCTORS' HEALTH", | |
"DOCTORS HEALTH", | |
"CARLTON FAMILY MEDIC", | |
"SCHNEIER PHARMACY", | |
] | |
BILLS = [ | |
"AUSSIE BROADBAND", | |
"TPG Internet PTY", | |
"EnergyAustralia", | |
"ORIGIN ENERGY", | |
"GWW - ", | |
"AMBER ELECTRIC", | |
] | |
LIQUOR = [ | |
"BWS LIQUOR", | |
"DAN MURPHY'S", | |
] | |
CHILDCARE = [ | |
"Fitzroy ELC", | |
] | |
GROCERIES = [ | |
"WOOLWORTHS", | |
"COLES", | |
"AMAZON MARKETPLACE", | |
"AMAZON AU", | |
"SUPA IGA", | |
"ALDI STORES", | |
"IGA EXPRESS", | |
"IGA FITZROY", | |
] | |
SHOPPING = [ | |
"Uniqlo Australia", | |
"KMART", | |
"OFFICEWORKS", | |
"CWH ", | |
"BUNNINGS ", | |
"IKEA PTY LTD", | |
"IKEA RICHMOND", | |
] | |
COFFEE = [ | |
"ASSEMBLY STORE", | |
"D O C ESPRESSO", | |
"Industry Beans Fitzroy", | |
"INDUSTRYBEANS", | |
"GOOD MEASURE", | |
"MARKET LANE COFFEE", | |
] | |
TAKEOUT = [ | |
"UBER* EATS", | |
"UBER *EATS", | |
"7-ELEVEN", | |
"YO CHI", | |
"GRILLD", | |
"PAVLOVS DUCK", | |
"Dominos", | |
"Nasi Lemak House", | |
"MCDONALDS", | |
"Tamago Sushi", | |
] | |
TRANSPORT = [ | |
"UBER* TRIP", | |
"UBER* TRIP", | |
"UBER *TRIP", | |
] | |
CATEGORIES = { | |
"rent": RENT, | |
"groceries": GROCERIES + SHOPPING, | |
"non-essentials": COFFEE + TAKEOUT + LIQUOR + TRANSPORT, | |
"childcare": CHILDCARE, | |
"bills": BILLS + HEALTH_INSURANCE, | |
} | |
CSV_FILE = "data/ing-transactions-20-03-2023.csv" | |
""" | |
sample rows | |
Date,Description,Credit,Debit,Balance | |
27/04/2023,AHOY ROY FISH N CHIPS - Visa Purchase - Receipt 107092In ALBERT PARK Date 25 Apr 2023 Card xxxxx,,-58.85,2765.92 | |
27/04/2023,COPP PARKING - Visa Purchase - Receipt 107091In ST KILDA Date 25 Apr 2023 Card xxxxx,,-11.02,2824.77 | |
27/04/2023,D O C ESPRESSO - Visa Purchase - Receipt 107090In CARLTON Date 25 Apr 2023 Card xxxxx,,-67.08,2835.79 | |
""" | |
def main(): | |
st.header("ING banking data") | |
df = pd.read_csv(CSV_FILE) | |
df = df.rename( | |
columns={ | |
"Date": "date", | |
"Description": "description", | |
"Credit": "credit", | |
"Debit": "debit", | |
"Balance": "balance", | |
} | |
) | |
df.date = pd.to_datetime(df.date, format="%d/%m/%Y") | |
df = df[df["debit"].notna()] | |
df = df.drop(["credit", "balance"], axis=1) | |
df["debit"] = -1 * df["debit"] | |
desc = st.text_input("Description to match") | |
if desc: | |
df_preview = df[df["description"].str.contains(desc, case=False, regex=False)] | |
st.write(df_preview) | |
df = ignore_in_description(df, IGNORE_DESCRIPTIONS) | |
for colname, descriptions in CATEGORIES.items(): | |
add_column_with_matching(df, descriptions, colname) | |
is_display_remainder = st.checkbox("Show unclassified rows") | |
if is_display_remainder: | |
st.write(df[df["debit"].notna()]) | |
# Debit only | |
agg_df = df.resample(rule="M", on="date").agg( | |
{"debit": "sum", **{k: "sum" for k in CATEGORIES.keys()}} | |
) | |
st.subheader("Family spending") | |
cols = st.multiselect( | |
"Categories", options=agg_df.columns, default=list(agg_df.columns) | |
) | |
st.bar_chart(agg_df[cols]) | |
start_date = st.date_input( | |
"Summary start date", | |
min_value=df.date.min(), | |
max_value=df.date.max(), | |
value=max(df.date.min(), df.date.max() - timedelta(days=90)), | |
) | |
start_dt = datetime.combine(start_date, datetime.min.time()) | |
summary_series = ( | |
df[df.date >= start_dt][cols] | |
.agg({"debit": "sum", **{k: "sum" for k in cols}}) | |
.round() | |
) | |
summary_df = pd.DataFrame( | |
{ | |
"spend": summary_series.values, | |
"percent": (100 * summary_series / summary_series.sum()).round(), | |
} | |
) | |
time_period = (df.date.max() - start_dt).days | |
st.subheader( | |
f"Total spend over last {time_period} days ({start_dt.date()} to {df.date.max().date()})" | |
) | |
st.write(summary_df) | |
total = summary_series.sum() | |
st.write(f"Total: ${total:0.0f}") | |
st.subheader(f"Monthly spend over last {time_period} days") | |
monthly_adjustment = (365 / time_period) / 12 | |
summary_df.spend = (summary_df.spend * monthly_adjustment).round() | |
st.write(summary_df) | |
monthly_avg = monthly_adjustment * total | |
st.write(f"Monthly average: ${monthly_avg:0.0f}") | |
def add_column_with_matching(df: pd.DataFrame, matches: List[str], colname: str): | |
mask = None | |
for desc in matches: | |
desc_mask = df["description"].str.contains(desc, case=False, regex=False) | |
if mask is not None: | |
mask |= desc_mask | |
else: | |
mask = desc_mask | |
if mask is not None: | |
df[colname] = None | |
df[colname][mask] = df[mask].debit | |
df["debit"][mask] = None | |
def ignore_in_description(df: pd.DataFrame, ignores: List[str]): | |
mask = None | |
for desc in ignores: | |
desc_mask = ~df["description"].str.contains(desc, case=False, regex=False) | |
if mask is not None: | |
mask &= desc_mask | |
else: | |
mask = desc_mask | |
if mask is not None: | |
return df[mask] | |
else: | |
return df | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment