Created
October 28, 2022 13:43
-
-
Save bennofs/ffa844d2e0556d66227447ad5487b9ca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Process PayPal transaction exports for ledger import | |
Usage: | |
process.py TRANSACTIONS ACTIVITY | |
where TRANSACTIONS is the csv export of PayPal transactions (https://www.paypal.com/reports/statements/custom) | |
and ACTIVITY is the csv export of PayPal activities (https://www.paypal.com/reports/dlog) for the same time range. | |
""" | |
from docopt import docopt | |
from numpy import dtype | |
import pandas as pd | |
import collections | |
TIMEZONE_TO_OFFSET = { | |
'CEST': '+0200', | |
'CET': '+0100', | |
} | |
PAYPAL_OLD_REF_TO_NEW_REF = { | |
'5ZW23UWZAGXG8': '1011989330659', | |
'5ZW23UXHMN6HU': '1012263618813', | |
'5ZW23UY7VBNLU': '1012636739885', | |
'5ZW23UXPSWE2Y': '1012367038479', | |
'5ZW23UX6DUZSY': '1012074987407', | |
'5ZW23UYYKFZPY': '1013050998623', | |
'5ZW23UYNKLBYG': '1012883297255', | |
'5ZW23UYRZJNMA': '1012940941620', | |
'5ZW23UYZJC9JL': '1013067190025', | |
'5ZW23UZ95GH8J': '1013194661480', | |
} | |
def generate_record(activity_by_code, txs): | |
record = { | |
'amount_bank_deduct': 0, | |
'amount_convert_base': 0, | |
'amount_convert_other': 0, | |
'amount_main': 0, | |
'currency_main': 'EUR', | |
'fee': None, | |
'currency_convert_base': '', | |
'currency_convert_other': '', | |
'bank_ref': '', | |
'bank_name': '', | |
'desc': None, | |
'date': None, | |
'time': None, | |
'invoice_code': '', | |
'name': None, | |
'tx_code': None, | |
'kind': 'normal', | |
'bank_currency': None, | |
'email': None, | |
} | |
main_tx = None | |
for tx in txs: | |
desc = tx['Beschreibung'] | |
entgelt = tx['Entgelt'] if 'Entgelt' in tx else tx['Entgelt '] | |
brutto = tx['Brutto'] if 'Brutto' in tx else tx['Brutto '] | |
if desc == "Bankgutschrift auf PayPal-Konto" or desc == 'Allgemeine Abbuchung' or desc == 'Allgemeine Abbuchung – Bankkonto': | |
assert entgelt.strip() == '0,00', "fees not supported for bank transactions" | |
record['amount_bank_deduct'] = brutto | |
record['bank_name'] = tx['Name der Bank'] | |
record['bank_currency'] = tx['Währung'] | |
bank_refs = [x['Bankreferenz'] for x in activity_by_code[tx['Transaktionscode']]] | |
bank_refs = [x for x in bank_refs if x and not pd.isna(x)] | |
if len(bank_refs) != 1: | |
print(pd.DataFrame(activity_by_code[tx['Transaktionscode']])) | |
raise RuntimeError("multiple bank refs or no bank ref") | |
bank_ref = bank_refs[0] | |
record['bank_ref'] = PAYPAL_OLD_REF_TO_NEW_REF.get(bank_ref, bank_ref) | |
# if this is the only TX, treat it as main TX as well | |
if len(txs) != 1: | |
continue | |
record['kind'] = 'bank' | |
if desc == 'Allgemeine Währungsumrechnung': | |
assert entgelt.strip() == '0,00', "fees not supported for currency transactions" | |
base = 'base' if tx['Währung'] == 'EUR' or record['currency_convert_other'] else 'other' | |
record[f'amount_convert_{base}'] = brutto | |
record[f'currency_convert_{base}'] = tx['Währung'] | |
continue | |
if main_tx is not None: | |
print(pd.DataFrame([main_tx, tx])) | |
print("--") | |
print(pd.DataFrame(txs)) | |
raise RuntimeError("tx not handled") | |
main_tx = tx | |
record['desc'] = tx['Beschreibung'] | |
record['name'] = tx['Name'] | |
dt = tx['timestamp'].tz_convert('Europe/Berlin') | |
record['date'] = dt.strftime("%Y-%m-%d") | |
record['time'] = dt.strftime("%H:%M:%S") | |
record['currency_main'] = tx['Währung'] | |
record['amount_main'] = brutto | |
record['invoice_code'] = tx['Rechnungsnummer'] | |
record['tx_code'] = tx['Transaktionscode'] | |
record['fee'] = entgelt | |
record['email'] = tx['Absender (E-Mail-Adresse)'] if 'Absender (E-Mail-Adresse)' in tx else tx['Absender E-Mail-Adresse'] | |
record['timestamp'] = tx['timestamp'] | |
if main_tx is None: | |
print(pd.DataFrame(txs)) | |
raise RuntimeError("no main tx") | |
if bool(record['currency_convert_base']) != bool(record['currency_convert_other']): | |
print(pd.DataFrame(txs)) | |
raise RuntimeError("missing in or out currency convert transaction") | |
return record | |
def main(): | |
assert __doc__ is not None | |
args = docopt(__doc__) | |
txs = pd.read_csv(args['TRANSACTIONS'], dtype=str) | |
activity = pd.read_csv(args['ACTIVITY'], dtype=str) | |
txs_ts = pd.to_datetime(txs['Datum'] + " " + txs['Uhrzeit'] + " " + txs['Zeitzone'], format="%d.%m.%Y %H:%M:%S %Z", utc=True) | |
txs = txs.assign(timestamp=txs_ts) | |
activity_ts = pd.to_datetime(activity['Datum'] + " " + activity['Uhrzeit'] + activity['Zeitzone'].map(TIMEZONE_TO_OFFSET), format="%d.%m.%Y %H:%M:%S%z", utc=True) | |
activity = activity.assign(timestamp=activity_ts) | |
activity_by_code = collections.defaultdict(list) | |
for _, r in activity.iterrows(): | |
code = r['Transaktionscode'] | |
activity_by_code[code].append(r) | |
records = [] | |
for _, rows in txs.groupby('timestamp'): | |
tx_by_root = {} | |
has_parent = set() | |
for _, tx in rows.iterrows(): | |
code = tx['Transaktionscode'] | |
related_code = tx['Zugehöriger Transaktionscode'] | |
if pd.isna(related_code) or not related_code.strip(): | |
tx_by_root.setdefault(code, []) | |
tx_by_root[code].append(tx) | |
continue | |
tx_by_root.setdefault(related_code, []) | |
tx_by_root[related_code].extend([tx] + tx_by_root.get(code, [])) | |
tx_by_root[code] = tx_by_root[related_code] | |
has_parent.add(code) | |
roots = set(tx_by_root) - has_parent | |
records.extend(generate_record(activity_by_code, tx_by_root[root]) for root in roots) | |
df = pd.DataFrame.from_records(records) | |
print(df.to_csv(index=False)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment