Last active
June 7, 2023 09:54
-
-
Save EpicWink/fee98be0baf6bf0afc1ebec2f7063567 to your computer and use it in GitHub Desktop.
Parse Suncorp bank statement and output transactions as CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Parse Suncorp bank statement text and output transactions as CSV. | |
Download e-statements, copy transactions into text file, pass this | |
file as stdin, send stdout to new CSV file. | |
```shell | |
cat statement.txt | python parse-suncorp-statement.py > transactions.csv | |
``` | |
Set `ADDITIONAL_IGNORED_LINES` to exceptional extra lines to be | |
ignored. | |
Set `ADDITIONAL_INTERNET_TRANSFER_ACCOUNTS` to known extra external | |
internet transfer source names/ | |
""" | |
import io | |
import re | |
import csv | |
import sys | |
import typing as t | |
import datetime | |
import dataclasses | |
CURRENCIES_REGEX = r"((aud)|(usd))" | |
ADDITIONAL_IGNORED_LINES = [] | |
ADDITIONAL_INTERNET_TRANSFER_ACCOUNTS = [] | |
@dataclasses.dataclass(slots=True) | |
class Transaction: | |
"""Transaction details.""" | |
occurred: datetime.date | |
"""Transaction date.""" | |
amount: int | |
"""Transaction amount (cents), negative is withdrawal.""" | |
other_account_name: str | |
"""Name of other account in transaction.""" | |
reference: str | |
"""Transaction reference.""" | |
def _log(message: str) -> None: | |
print(message, file=sys.stderr) | |
def _parse_amount(amount_text: str, allow_negative: bool = False) -> int: | |
if allow_negative and amount_text[-1] == "-": | |
return -_parse_amount(amount_text[:-1]) | |
return round(float(amount_text.replace(",", "")) * 100) | |
def parse_transactions_from_bank_statement( | |
lines: t.Iterable[str], | |
) -> t.Generator[Transaction, None, None]: | |
transaction_pattern = re.compile( | |
r"^(?P<day>\d+) (?P<month>[a-zA-Z]{3}) (?P<year>\d{4}) " | |
r"(?P<account>.+) " | |
r"(?P<amount>-?(\d+,)*\d+\.\d+) " | |
r"(?P<balance>-?(\d+,)*\d+\.\d+-?)" | |
r"( .+)?$" | |
) | |
internet_transfer_pattern = re.compile( | |
r"^((from)|(to)) (?P<account>.+)(?: ref(erence)? no (?P<ref>\S+))?$" | |
) | |
page_pattern = re.compile(r"page \d+ of \d+") | |
footer_pattern = re.compile(r"statement no: .+") | |
foreign_currency_pattern = re.compile(r"(\d+,)*\d+.\d+ " + CURRENCIES_REGEX) | |
effective_date_pattern = re.compile(r"effective date \d\d/\d\d/\d{4}") | |
months = { | |
"Jan": 1, | |
"Feb": 2, | |
"Mar": 3, | |
"Apr": 4, | |
"May": 5, | |
"Jun": 6, | |
"Jul": 7, | |
"Aug": 8, | |
"Sep": 9, | |
"Oct": 10, | |
"Nov": 11, | |
"Dec": 12, | |
} | |
ignored_lines = ( | |
"account transactions", | |
"account transactions continued", | |
"date transaction details withdrawal deposit balance", | |
"details are continued on the back of this page", | |
"aud", | |
"13 11 55", | |
"suncorp.com.au", | |
*ADDITIONAL_IGNORED_LINES, | |
) | |
internet_transfer_accounts = ( | |
"internet transfer credit", | |
"internet transfer debit", | |
"internet external transfer", | |
"osko payment", | |
"rev osko payment", | |
*ADDITIONAL_INTERNET_TRANSFER_ACCOUNTS, | |
) | |
prior_balance = None | |
transaction = None | |
for i, line in enumerate(lines): | |
line = line.strip() | |
if not line: | |
continue | |
line_lowercase = line.lower() | |
if ( | |
line_lowercase in | |
or page_pattern.fullmatch(line_lowercase) | |
or footer_pattern.fullmatch(line_lowercase) | |
or effective_date_pattern.fullmatch(line_lowercase) | |
or foreign_currency_pattern.fullmatch(line_lowercase) | |
): | |
continue | |
elif line_lowercase[:15] == "opening balance": | |
current_balance = _parse_amount(line_lowercase[16:], allow_negative=True) | |
if prior_balance is None: | |
prior_balance = current_balance | |
if prior_balance != current_balance: | |
raise ValueError( | |
f"Opening balance (line {i}) doesn't match closing balance: " | |
f"${prior_balance / 100.0} != ${current_balance / 100.0}" | |
) | |
elif line_lowercase[:23] == "balance carried forward": | |
current_balance = _parse_amount(line_lowercase[24:], allow_negative=True) | |
if prior_balance != current_balance: | |
raise ValueError( | |
f"Carried-forward balance (line {i}) doesn't match prior balance: " | |
f"${prior_balance / 100.0} != ${current_balance / 100.0}" | |
) | |
elif line_lowercase[:23] == "balance brought forward": | |
current_balance = _parse_amount(line_lowercase[24:], allow_negative=True) | |
if prior_balance != current_balance: | |
raise ValueError( | |
f"Brought-forward balance (line {i}) doesn't match prior balance: " | |
f"${prior_balance / 100.0} != ${current_balance / 100.0}" | |
) | |
elif line_lowercase[:15] == "closing balance": | |
current_balance = _parse_amount(line_lowercase[16:], allow_negative=True) | |
if prior_balance != current_balance: | |
raise ValueError( | |
f"Closing balance (line {i}) doesn't match prior balance: " | |
f"${prior_balance / 100.0} != ${current_balance / 100.0}" | |
) | |
elif transaction_match := transaction_pattern.fullmatch(line): | |
if transaction: | |
yield transaction | |
day = int(transaction_match["day"]) | |
month = months[transaction_match["month"]] | |
year = int(transaction_match["year"]) | |
occurred = datetime.date(year=year, month=month, day=day) | |
current_balance = _parse_amount( | |
transaction_match['balance'], allow_negative=True | |
) | |
amount = _parse_amount(transaction_match["amount"]) | |
if current_balance < prior_balance: | |
amount *= -1 | |
transaction = Transaction( | |
occurred, | |
amount, | |
other_account_name=transaction_match["account"], | |
reference="", | |
) | |
prior_balance = current_balance | |
elif transaction: | |
if internet_transfer_match := internet_transfer_pattern.fullmatch( | |
line_lowercase, | |
): | |
if ( | |
transaction.other_account_name.lower() | |
not in internet_transfer_accounts | |
): | |
_log(f"Other account: {transaction.other_account_name}") | |
raise ValueError( | |
f"Unexpected internet transfer reference (line {i}): " | |
f"{line}" | |
) | |
if internet_transfer_match["ref"]: | |
if transaction.reference: | |
transaction.reference += "; " | |
transaction.reference += f"REF NO {internet_transfer_match['ref']}" | |
transaction.other_account_name = internet_transfer_match['account'] | |
else: | |
if transaction.reference: | |
transaction.reference += "; " | |
transaction.reference += line | |
else: | |
_log(f"Current transaction: {transaction}") | |
raise ValueError(f"Unexpected line ({i}): {line}") | |
if transaction: | |
yield transaction | |
def emit_transactions_csv( | |
transactions: t.Iterable[Transaction], | |
stream: t.BinaryIO, | |
) -> None: | |
stream_text = io.TextIOWrapper( | |
stream, | |
newline="", | |
encoding="utf-8", | |
write_through=True, | |
) | |
writer = csv.writer(stream_text) | |
writer.writerow(('date', 'amount ($)', 'other account', 'reference')) | |
for transaction in transactions: | |
occurred_text = transaction.occurred.isoformat() | |
amount_text = str(round(transaction.amount / 100.0, 2)) | |
writer.writerow(( | |
occurred_text, | |
amount_text, | |
transaction.other_account_name, | |
transaction.reference, | |
)) # fmt: skip | |
def main() -> None: | |
transactions = parse_transactions_from_bank_statement(sys.stdin) | |
emit_transactions_csv(transactions, sys.stdout.buffer) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment