Created
August 24, 2025 21:16
-
-
Save JaosnHsieh/936271a27d734f868ef9d60d243b43c3 to your computer and use it in GitHub Desktop.
python3 script to generate interactive chart html report for rogers credit card bank statement
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pdfplumber | |
import csv | |
import re | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
# ---- Config ---- | |
PDF_FILE = "1.pdf" | |
CSV_FILE = "bank_statement.csv" | |
OUTPUT_HTML = "report.html" | |
LARGE_THRESHOLD = 300.0 | |
# Keyword-based categorization rules | |
CATEGORY_RULES = { | |
"food": ["TIM HORTONS", "MCDONALD", "STARBUCKS", "BURGER", "RESTAURANT", "SHANGHAI360"], | |
"groceries": ["COSTCO", "SUPERMARKET", "FOODMART", "METRO", "NOFRILLS", "WAL-MART"], | |
"transport": ["PRESTO", "UBER", "LYFT", "PARKING", "IMARK", "GAS", "ESSO"], | |
"shopping": ["AMAZON", "HOME DEPOT", "IKEA", "SHOPPERS"], | |
"utilities": ["BELL", "ROGERS", "VIRGIN", "FIDOMOBILE", "ECONOMICAL INSURANCE"], | |
"health": ["PHARMACY", "HOSPITAL"], | |
"other": [] | |
} | |
def extract_transactions(pdf_path, csv_path): | |
transactions = [] | |
in_transactions = False | |
# Regex to capture transactions | |
pattern = re.compile(r"^([A-Za-z]{3}\d{1,2})\s+([A-Za-z]{3}\d{1,2})\s+(.+?)(-?\d[\d,]*\.\d{2})$") | |
with pdfplumber.open(pdf_path) as pdf: | |
for page in pdf.pages: | |
text = page.extract_text() | |
if not text: | |
continue | |
lines = text.splitlines() | |
for line in lines: | |
# Detect start of transactions | |
if "TransactionDetails" in line: | |
in_transactions = True | |
continue | |
# Detect possible end of section | |
if "InterestRateChart" in line or "AccountNumber" in line: | |
in_transactions = False | |
if in_transactions: | |
line = line.strip() | |
match = pattern.match(line) | |
if match: | |
trans_date = match.group(1) | |
post_date = match.group(2) | |
desc = match.group(3).strip() | |
amount = match.group(4).replace(",", "") | |
transactions.append([trans_date, post_date, desc, amount]) | |
# Write to CSV | |
with open(csv_path, "w", newline="") as f: | |
writer = csv.writer(f) | |
writer.writerow(["Trans Date", "Post Date", "Description", "Amount ($)"]) | |
writer.writerows(transactions) | |
def categorize(description: str) -> str: | |
desc_upper = description.upper() | |
for category, keywords in CATEGORY_RULES.items(): | |
for kw in keywords: | |
if kw in desc_upper: | |
return category | |
return "other" | |
def generate_report(csv_path, output_html): | |
df = pd.read_csv(csv_path) | |
# Clean data | |
df["Amount ($)"] = df["Amount ($)"].astype(str).str.replace(",", "").astype(float) | |
# Categorization | |
df["Category"] = df["Description"].apply(categorize) | |
# Separate out payments (negative amounts = credit card payments) | |
payments_df = df[df["Amount ($)"] < 0].copy() | |
spend_df = df[df["Amount ($)"] > 0].copy() | |
# Large transactions (only consider spending) | |
spend_df["Is Large"] = spend_df["Amount ($)"].abs() > LARGE_THRESHOLD | |
# Totals per category (only spending) | |
category_totals = spend_df.groupby("Category")["Amount ($)"].sum().reset_index() | |
# Total money spent | |
total_spent = spend_df["Amount ($)"].sum() | |
# ---- Visualization with Plotly (interactive) ---- | |
figures = [] | |
# Pie chart (absolute values) | |
category_totals["AbsAmount"] = category_totals["Amount ($)"].abs() | |
pie = px.pie( | |
category_totals, | |
names="Category", | |
values="AbsAmount", | |
title="Spending by Category" | |
) | |
pie.update_traces(textinfo="percent+label", textfont=dict(size=14), sort=False) | |
pie.update_layout( | |
title_font=dict(size=20), | |
legend=dict(font=dict(size=12)), | |
width=500, | |
height=400 | |
) | |
figures.append(pie) | |
# Bar chart | |
bar = px.bar( | |
category_totals, | |
x="Category", | |
y="Amount ($)", | |
title="Spending Amount per Category", | |
text="Amount ($)" | |
) | |
bar.update_traces(texttemplate="%{text:.2s}", textposition="outside") | |
bar.update_layout( | |
title_font=dict(size=20), | |
xaxis=dict(tickfont=dict(size=10)), | |
yaxis=dict(tickfont=dict(size=10)), | |
width=600, | |
height=400 | |
) | |
figures.append(bar) | |
# Timeline (only spending) | |
spend_df["Post Date"] = pd.to_datetime(spend_df["Post Date"] + ", 2025", errors="coerce", format="%b%d, %Y") | |
timeline = px.line( | |
spend_df, | |
x="Post Date", | |
y="Amount ($)", | |
color="Category", | |
title="Timeline of Spending", | |
markers=True | |
) | |
timeline.update_layout( | |
title_font=dict(size=20), | |
xaxis=dict(tickfont=dict(size=10)), | |
yaxis=dict(tickfont=dict(size=10)), | |
width=700, | |
height=400 | |
) | |
figures.append(timeline) | |
# Large transactions table (only spending) | |
large_txn = spend_df[spend_df["Is Large"]] | |
if not large_txn.empty: | |
table = go.Figure(data=[go.Table( | |
header=dict(values=list(large_txn.columns), fill_color="paleturquoise", align="left", font=dict(size=14)), | |
cells=dict(values=[large_txn[c] for c in large_txn.columns], fill_color="lavender", align="left", font=dict(size=12)) | |
)]) | |
table.update_layout( | |
title="Large Transactions (>{})".format(LARGE_THRESHOLD), | |
title_font=dict(size=20), | |
width=800, | |
height=300 | |
) | |
figures.append(table) | |
# Payments table (separate section for negative transactions) | |
if not payments_df.empty: | |
pay_table = go.Figure(data=[go.Table( | |
header=dict(values=list(payments_df.columns), fill_color="lightgrey", align="left", font=dict(size=14)), | |
cells=dict(values=[payments_df[c] for c in payments_df.columns], fill_color="white", align="left", font=dict(size=12)) | |
)]) | |
pay_table.update_layout( | |
title="Payments (Card Balance)", | |
title_font=dict(size=20), | |
width=800, | |
height=300 | |
) | |
figures.append(pay_table) | |
# Total money spent summary FIRST with simple HTML | |
first_date = spend_df["Post Date"].min() | |
last_date = spend_df["Post Date"].max() | |
date_range_text = f" ({first_date.date()} → {last_date.date()})" if pd.notna(first_date) and pd.notna(last_date) else "" | |
summary_text = f"<h1>Total Money Spent: ${total_spent:,.2f}{date_range_text}</h1>" | |
# Ensure summary is written at the very top of HTML | |
summary_html = summary_text + "<br><br>" | |
# Write interactive HTML with JS interactivity | |
with open(output_html, "w") as f: | |
# Write summary first | |
f.write(summary_html) | |
# Add pie chart | |
f.write(figures[0].to_html(full_html=False, include_plotlyjs="cdn", div_id="piechart")) | |
f.write('<div id="category-table"></div>') | |
f.write("<br><br>\n") | |
# Add remaining figures (skip the pie we already wrote, summary already added separately) | |
for fig in figures[1:]: | |
f.write(fig.to_html(full_html=False, include_plotlyjs=False)) | |
f.write("<br><br>\n") | |
# Embed transaction data for interactivity | |
f.write("<script>\n") | |
f.write("const transactionData = {};\n") | |
for category in spend_df["Category"].unique(): | |
cat_df = spend_df[spend_df["Category"] == category].sort_values("Post Date") | |
f.write(f"transactionData['{category}'] = {cat_df.to_json(orient='records', date_format='iso')};\n") | |
f.write(""" | |
// Function to render table into #category-table | |
function renderTable(category) { | |
const container = document.getElementById('category-table'); | |
if (!transactionData[category] || transactionData[category].length === 0) { | |
container.innerHTML = "<p>No transactions found.</p>"; | |
return; | |
} | |
let html = "<h3>Transactions for " + category + "</h3>"; | |
html += "<table border='1' cellspacing='0' cellpadding='4'><tr>"; | |
for (const key of Object.keys(transactionData[category][0])) { | |
html += "<th>" + key + "</th>"; | |
} | |
html += "</tr>"; | |
for (const row of transactionData[category]) { | |
html += "<tr>"; | |
for (const key of Object.keys(row)) { | |
html += "<td>" + row[key] + "</td>"; | |
} | |
html += "</tr>"; | |
} | |
html += "</table>"; | |
container.innerHTML = html; | |
} | |
// Attach click event to pie chart | |
var pie = document.getElementById('piechart'); | |
pie.on('plotly_click', function(data){ | |
var category = data.points[0].label; | |
renderTable(category); | |
}); | |
""") | |
f.write("</script>") | |
def main(): | |
extract_transactions(PDF_FILE, CSV_FILE) | |
print("Extraction complete! Saved to bank_statement.csv") | |
generate_report(CSV_FILE, OUTPUT_HTML) | |
print(f"Analysis complete with Plotly interactive charts! Report saved to {OUTPUT_HTML}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment