Created
February 11, 2024 15:58
-
-
Save sachinsmc/1ac6290778828612e4f112ddbff1a6f8 to your computer and use it in GitHub Desktop.
parse endb statement
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree, objectify | |
import sys | |
import argparse | |
import re | |
import os.path | |
import datetime | |
def f_colorise(string, color): | |
colors = { | |
'red': '\033[91m', | |
'green': '\033[92m', | |
'blue': '\033[94m', | |
'cyan': '\033[96m', | |
'white': '\033[97m', | |
'yellow': '\033[93m', | |
'magenta': '\033[95m', | |
'grey': '\033[90m', | |
'black': '\033[90m' | |
} | |
if color not in colors: | |
return string | |
return colors[color] + string + '\033[0m' | |
def f_remove_namespace(source_xml): | |
metadata = source_xml | |
parser = etree.XMLParser(remove_blank_text=True) | |
tree = etree.parse(metadata, parser) | |
root = tree.getroot() | |
for elem in root.getiterator(): | |
if not hasattr(elem.tag, 'find'): | |
continue | |
i = elem.tag.find('}') | |
if i >= 0: | |
elem.tag = elem.tag[i+1:] | |
objectify.deannotate(root, cleanup_namespaces=True) | |
return tree | |
def f_xml_parse(source_xml, statement_type): | |
tree = source_xml | |
root = tree.getroot() | |
transaction_list = [] | |
for row in root.iter('Row'): | |
try: | |
if len(row.getchildren()) == 5: | |
''' | |
Debit: | |
[0] Date | |
[1] Description | |
[2] Debit | |
[3] Credit | |
[4] Account Balance | |
Credit: | |
[0] Transaction date | |
[1] Posting date | |
[2] Description | |
[3] Card Type | |
[4] Amount | |
transaction_list format: | |
[0] Date | |
[1] Description | |
[2] Amount | |
''' | |
transaction = [] | |
for cell in row.iter('Cell'): | |
for data in cell.iter('Data'): | |
transaction.append(data.text) | |
try: | |
if re.search(r'^[0-9]{2}', transaction[0]) is not None: | |
desc = None | |
amount = None | |
date = None | |
memo = "" | |
# print(transaction) | |
# Set correct properties | |
if statement_type == 'credit': | |
amount = str(transaction[4]).replace(',', '') | |
desc = str(transaction[2]).replace(',', '') | |
date = str(transaction[0]).replace(' ', '/') | |
else: | |
# Remove unneeded commas which cause csv problems | |
if transaction[2] is not None: | |
amount = str(transaction[2]).replace(',', '') | |
else: | |
amount = str(transaction[3]).replace(',', '') | |
# Date formatting | |
date = re.search(r'[0-9]{2}\-[0-6]{2}\-[0-9]{4}', transaction[1]) | |
if date is None: | |
date = datetime.datetime.strptime(str(transaction[0]).replace(' ', '/'), '%d/%b/%Y').strftime('%d/%b/%Y') | |
else: | |
date = datetime.datetime.strptime(str(date.group()).replace('-', '/'), '%d/%m/%Y').strftime('%d/%b/%Y') | |
# Cleanup awful description | |
desc = str(transaction[1]).replace(',', '') | |
desc = re.sub(r'POS-PURCHASE CARD NO\.[0-9]+\*\*\*\*\*\*[0-9]+\s*', '', desc) | |
desc = re.sub(r'^\d{6}\s*', '', desc) | |
desc = re.sub(r'[0-9]{2}\-[0-9]{2}\-[0-9]{4}\s*', '', desc) | |
desc = re.sub(r'[0-9]*\.[0-9]*AED\s*', '', desc) | |
desc = re.sub(r'\s+DUBAI:AE', '', desc) | |
transaction_list.append({ | |
"date": date, | |
"description": desc, | |
"amount": amount, | |
"memo": memo | |
}) | |
else: | |
continue | |
except IndexError: | |
continue | |
except KeyError: | |
pass | |
# List may be out of order due to pulling the transaction date from description in debit. | |
transaction_list = sorted( | |
transaction_list, | |
key=lambda x: datetime.datetime.strptime(x['date'], '%d/%b/%Y'), reverse=True | |
) | |
return transaction_list | |
def f_write_csv(transactions, csv_file): | |
f = open(csv_file, "w") | |
f.write("Date,Payee,Memo,Amount\n") | |
for t in transactions: | |
f.write('{date}, {payee}, {memo}, {amount}\n'.format( | |
date=t['date'], | |
payee=t['description'], | |
memo=t['memo'], | |
amount=t['amount'] | |
)) | |
f.close() | |
print(len(transactions), " transactions imported") | |
def f_display_menu(menu_prompt, menu_list): | |
""" | |
Pass a title for the menu, the list of options to display and an additional final option (All, Quit etc.) | |
Format for the list is: | |
List: Menu_List | |
- Dict: item1: property1, item1: property2, item1: property3 | |
- Dict: item2: property1, item2: property2, item2: property3 | |
""" | |
loop = True | |
while loop is True: | |
count = 0 | |
for item in menu_list: | |
count += 1 | |
print( | |
# Menu layout | |
"{list_number:>4} {date} {desc} {amount}".format( | |
# Set variables for the menu | |
list_number="[" + str(menu_list.index(item) + 1) + "]", | |
date=f_colorise(str(item["date"]), "green"), | |
desc=f_colorise(str(item["description"]), "blue"), | |
amount=f_colorise(str(item["amount"]), "green") if re.search(r'^-', item["amount"]) is None else f_colorise(str(item["amount"]), "red") | |
) | |
) | |
print("{list_number:>4} All".format(list_number="[" + str(count+1) + "]")) | |
print("{list_number:>4} Quit".format(list_number="[" + str(count+2) + "]")) | |
choice = input(menu_prompt) | |
try: | |
if int(choice) <= int(count): # if user picks a number then return that entry | |
return(menu_list[int(choice)]) | |
elif int(choice) == int(count+1): # if user picks all then return full array | |
return(menu_list) | |
elif int(choice) == int(count+2): # if user picks quit then quit | |
print("Exiting...") | |
loop = False | |
else: | |
input("\n" + f_colorise(str(choice), "red") + " is an unknown option. Press enter to retry: ") | |
except Exception as e: | |
print("Exception: ", e) | |
input("\n" + f_colorise(str(choice), "red") + " is invalid, the choice needs to be numeric. Press enter to retry: ") | |
exit(0) | |
def main(): | |
statement_type = None | |
csv_file = None | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-i", | |
required=True, | |
help="Specify source file", | |
action="store", | |
dest="xml_file", | |
default=None | |
) | |
parser.add_argument("-o", | |
required=False, | |
help="Specify output csv file", | |
action="store", | |
dest="csv_file", | |
default=None | |
) | |
ap_parsed = parser.parse_args() | |
with open(ap_parsed.xml_file, "r") as f: | |
for line in f: | |
if re.search("CURRENT ACCOUNT", line) is not None: | |
statement_type = "debit" | |
elif re.search("Credit Card Statement", line) is not None: | |
statement_type = "credit" | |
if statement_type is None: | |
print("Unknown statement type") | |
sys.exit(0) | |
if ap_parsed.csv_file is None: | |
csv_file = 'ynab_import.csv' | |
else: | |
csv_file = ap_parsed.csv_file | |
if os.path.isfile(csv_file): | |
while True: | |
reply = str(input('CSV file exists. Overwrite? (y/n): ')).lower().strip() | |
if reply[0] == 'y': | |
break | |
if reply[0] == 'n': | |
sys.exit(0) | |
cleaned_xml = f_remove_namespace(ap_parsed.xml_file) | |
processed_transactions = f_xml_parse(cleaned_xml, statement_type) | |
recent_transation = f_display_menu("Select oldest uncleared transaction: ", processed_transactions) | |
delete_trans = False | |
filtered_transactions = [] | |
for t in processed_transactions: | |
if t == recent_transation or delete_trans is True: | |
delete_trans = True | |
else: | |
filtered_transactions.append(t) | |
f_write_csv(filtered_transactions, csv_file) | |
return | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment