Last active
October 29, 2019 17:48
-
-
Save jzuern/48a6c38c1a548b32446a079b99948d30 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# coding=utf-8 | |
import collections | |
import sys | |
import itertools | |
import matplotlib.pyplot as plt | |
import matplotlib as mpl | |
import time | |
import datetime | |
import numpy as np | |
import glob | |
# example | |
from matplotlib.dates import DayLocator, MonthLocator, HourLocator, DateFormatter, drange | |
from numpy import arange | |
# helper functions------------------ | |
def german_value_string_to_float(value_string): | |
print(value_string) | |
# replace all , by . | |
v = value_string.replace(',','.') | |
# remove all but last . | |
v = v.replace('.','', v.count('.')-1) | |
# remove quotation marks | |
v = v.replace('"','') | |
return float(v) | |
def operation_equal(operation1, operation2): | |
return operation1 == operation2 | |
def german_to_datetime(german_date_string): | |
values = german_date_string.replace("\r","") | |
values = values.replace("\n","") | |
values = values.replace("\"","") | |
values = values.split(".") | |
day = int(values[0]) | |
month = int(values[1]) | |
year = int(values[2]) | |
dt = datetime.datetime(year=year,month=month,day=day) | |
return dt | |
def create_plotting_data(operations,start_balance): | |
start_balance_value = german_value_string_to_float(start_balance[1]) | |
start_balance_time = german_to_datetime(start_balance[0]) | |
unsorted_dates = [] | |
unsorted_values = [] | |
for operation in operations: | |
dt = german_to_datetime(operation['Buchungstag']) | |
unsorted_dates.append(dt) | |
if operation['Soll/Haben'] == "H": | |
unsorted_values.append(operation['Umsatz']) | |
else: | |
unsorted_values.append(-1.0*operation['Umsatz']) | |
#sort them | |
dates, value = zip(*sorted(zip(unsorted_dates,unsorted_values))) | |
# start accumulation with start balance | |
current_assets = [start_balance_value] | |
operation_dates = [start_balance_time] | |
#accumulate values | |
for i in range(len(value)): | |
current_assets.append(value[i] + current_assets[-1]) | |
operation_dates.append(dates[i]) | |
return operation_dates,current_assets | |
def find_start_balance(balances): | |
# find earliest balance: | |
most_recent_time = datetime.datetime.now() | |
start_balance = () | |
for balance in balances: | |
balance_time = german_to_datetime(balance[0]) | |
if (balance_time < most_recent_time): | |
start_balance = balance | |
most_recent_time = balance_time | |
return start_balance | |
def load_csv_folder(directory): | |
operations = [] | |
balances = [] | |
# delimiter for "Haben" (credit) | |
delim_H = '"H"' | |
# delimiter for "Soll" (debit) | |
delim_S = '"S"' | |
for csv_filename in glob.glob(directory + '/*.csv'): | |
file = open(csv_filename, 'r', encoding = "ISO-8859-1") | |
# skip first 13 lines | |
for i in range(13): | |
next(file) | |
content = file.read() | |
file.close() | |
# split based on delimiter for debit | |
for block in content.split(delim_S): | |
block = block + '"S"' | |
for subblock in block.split(delim_H): | |
print("subblock = " + subblock) | |
# initiate empty operation and balance | |
operation = {} | |
if (not ('"S"' in subblock)): | |
operation['Soll/Haben'] = "H" | |
else: | |
operation['Soll/Haben'] = "S" | |
values = subblock.split(';') | |
# handle starting balance and end balance | |
if ("Anfangssaldo" in subblock or "Endsaldo" in subblock): | |
date = values[0].replace('\n', '').replace('\r', '') | |
amount = values[-2] | |
balance = (date, amount) | |
balances.append(balance) | |
continue | |
if(len(values) == 13): # only process valid blocks with 13 fields in values | |
print("found valid block") | |
operation['Buchungstag'] = values[0] | |
operation['Valuta'] = values[1] | |
operation['Zahlungsempfaenger'] = values[2] | |
operation['Zahlungspflichtiger'] = values[3] | |
operation['IBAN'] = values[5] | |
operation['BIC'] = values[7] | |
operation['Verwendungszweck'] = values[8].replace('\n', ' ') | |
operation['Waehrung'] = values[10] | |
operation['Umsatz'] = german_value_string_to_float(values[11]) | |
operations.append(operation) | |
continue | |
return operations, balances | |
def plot_data(dates, assets): | |
fig, ax = plt.subplots() | |
ax.plot_date(dates, assets, linestyle='solid', marker='.') | |
ax.set_xlim(dates[0], dates[-1]) | |
ax.set_title('Assets') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Balance[EUR]') | |
ax.xaxis.set_major_locator(MonthLocator()) | |
ax.xaxis.set_minor_locator(DayLocator()) | |
ax.xaxis.set_major_formatter(DateFormatter('%m/%d/%Y')) | |
ax.xaxis.set_minor_formatter(DateFormatter('%m/%d')) | |
ax.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S') | |
fig.autofmt_xdate() | |
plt.show() | |
if __name__ == "__main__": | |
operations, balances = load_csv_folder('data') | |
start_balance = find_start_balance(balances) | |
dates, assets = create_plotting_data(operations,start_balance) | |
# plotting | |
plot_data(dates, assets) |
We can make this file beautiful and searchable if this error is corrected: Any value after quoted field isn't allowed in line 3.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"My awesome bank" | |
"Umsatzanzeige" | |
"BLZ:";"0000000";;"Datum:";"03.01.2018" | |
"Konto:";"0000000";;"Uhrzeit:";"14:16:21" | |
"Abfrage von:";"John Doe";;"Kontoinhaber:";"John Doe" | |
"Zeitraum:";"Alle Umsätze";"von:";;"bis:"; | |
"Betrag in EUR:";;"von:";" ";"bis:";" " | |
"Sortiert nach:";"Buchungstag";"absteigend" | |
"Buchungstag";"Valuta";"Auftraggeber/Zahlungsempfänger";"Empfänger/Zahlungspflichtiger";"Konto-Nr.";"IBAN";"BLZ";"BIC";"Vorgang/Verwendungszweck";"Kundenreferenz";"Währung";"Umsatz";" " | |
"21.09.2017";"21.09.2017";"Max Miller";"J H";;"XX0000";;"XXXXXXXXXX";"holidays";;"EUR";"10,00";"S" | |
"22.09.2017";"21.09.2017";"Max Miller";"J H";;"XX0000";;"XXXXXXXXXX";"costco";;"EUR";"20,00";"H" | |
"23.09.2017";"21.09.2017";"Max Miller";"J H";;"XX0000";;"XXXXXXXXXX";"lunch";;"EUR";"84,00";"S" | |
"23.09.2017";"21.09.2017";"Max Miller";"J H";;"XX0000";;"XXXXXXXXXX";"dinner";;"EUR";"14,00";"S" | |
"24.09.2017";"21.09.2017";"Max Miller";"J H";;"XX0000";;"XXXXXXXXXX";"dinner";;"EUR";"33,00";"H" | |
"25.09.2017";"21.09.2017";"Max Miller";"J H";;"XX0000";;"XXXXXXXXXX";"present";;"EUR";"50,00";"H" | |
"20.09.2017";;;;;;;;;"Anfangssaldo";"EUR";"100,00";"H" | |
"26.09.2017";;;;;;;;;"Endsaldo";"EUR";"90,00";"H" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment