-
-
Save hnykda/b8912ec1db311d53426520a95d35d112 to your computer and use it in GitHub Desktop.
import argparse | |
import datetime | |
import pandas as pd | |
def parse_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('input', help='Input revolut export file') | |
parser.add_argument('--output', default="rev2wall-{CURRENCY}-{TIMESTAMP}.csv", help='Input Revolut file') | |
return parser.parse_args() | |
def load_df(input_file): | |
_df = (pd | |
.read_csv(input_file, sep=';', parse_dates=['Completed Date '], thousands=',', na_values=[' ']) | |
.rename(columns=lambda x: x.strip()) | |
) | |
return _df | |
def get_currency(df): | |
return df.filter(like='Paid Out').columns[0][-4:-1] | |
def wrangle_format(_df, cur): | |
df = (_df | |
.assign( | |
currency=cur, | |
paid_out_currency=lambda x: x['Paid Out ({})'.format(cur)].astype(float), | |
paid_in_currency=lambda x: x['Paid In ({})'.format(cur)].astype(float), | |
amount=lambda x: x.paid_in_currency.where(x.paid_in_currency.notna(), -x.paid_out_currency), | |
date=lambda x: pd.to_datetime(x['Completed Date']).dt.strftime('%Y/%m/%d'), | |
note=lambda x: (x['Notes'] + ' ' + x['Category']).str.strip(), | |
type=lambda x: (x['paid_in_currency'].notna()).map({True: "Income", False: "Expense"}), | |
) | |
.rename(columns=lambda x: x.lower()) | |
.filter(['date', 'currency', 'amount', 'note', 'type']) | |
) | |
return df | |
def main(input_file): | |
_df = load_df(input_file) | |
cur = get_currency(_df) | |
df = wrangle_format(_df, cur) | |
return df, cur | |
if __name__ == "__main__": | |
args = parse_args() | |
df, cur = main(args.input) | |
try: | |
output = args.output.format(CURRENCY=cur, TIMESTAMP=pd.datetime.now().strftime("%Y%m%dT%H%M%S")) | |
except KeyError: | |
output = args.output | |
df.to_csv(output, index=False, sep=";") |
@Galeaettu got a SyntaxError
which is most likely by me using f-strings which are in Python 3.7 and not in previous versions. The .notna()
addition won't help here, but changing all f-strings to .format
would. So e.g. in the case of
paid_out_currency=lambda x: x[f'Paid Out ({cur})'].astype(float),
paid_in_currency=lambda x: x[f'Paid In ({cur})'].astype(float),
you need
paid_out_currency=lambda x: x['Paid Out ({})'.format(cur)].astype(float),
paid_in_currency=lambda x: x['Paid In ({})'.format(cur)].astype(float),
I changed that in the original version
Getting this on my debian machine, any idea how to solve this?
Traceback (most recent call last):
File "/usr/local/lib/python3.4/dist-packages/pandas/core/indexes/base.py", line 2525, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 117, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 139, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1265, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1273, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'paid_in_currency'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "revolut2wallet.py", line 46, in <module>
df, cur = main(args.input)
File "revolut2wallet.py", line 41, in main
df = wrangle_format(_df, cur)
File "revolut2wallet.py", line 34, in wrangle_format
.filter(['date', 'currency', 'amount', 'note', 'type'])
File "/usr/local/lib/python3.4/dist-packages/pandas/core/frame.py", line 2685, in assign
results[k] = com._apply_if_callable(v, data)
File "/usr/local/lib/python3.4/dist-packages/pandas/core/common.py", line 477, in _apply_if_callable
return maybe_callable(obj, **kwargs)
File "revolut2wallet.py", line 31, in <lambda>
type=lambda x: (x['paid_in_currency'].notna()).map({True: "Income", False: "Expense"}),
File "/usr/local/lib/python3.4/dist-packages/pandas/core/frame.py", line 2139, in __getitem__
return self._getitem_column(key)
File "/usr/local/lib/python3.4/dist-packages/pandas/core/frame.py", line 2146, in _getitem_column
return self._get_item_cache(key)
File "/usr/local/lib/python3.4/dist-packages/pandas/core/generic.py", line 1842, in _get_item_cache
values = self._data.get(item)
File "/usr/local/lib/python3.4/dist-packages/pandas/core/internals.py", line 3843, in get
loc = self.items.get_loc(item)
File "/usr/local/lib/python3.4/dist-packages/pandas/core/indexes/base.py", line 2527, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 117, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 139, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1265, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1273, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'paid_in_currency'
@troych Because you use Python 3.4, you need at least 3.6 where an order of keywords arguments is guaranteed (technically in 3.7, but 3.6+ will work as well). Python 3.4 is retired, you should get rid of it.
That worked indeed, finally had some time to update my python install.
Second problem now, for my currency (CHF) revolut uses "," to delimit. So I've got payments of 22,70 for example. This does not seem to work with your snippet, 22,70 gets converted to 2270. If I change the delimiter to "." things work correctly.
Yeah, that's annoying. I guess that could be solved by changing thousands
here.
Yeah, that's annoying. I guess that could be solved by changing
thousands
here.
Thanks, I've ended up doing it like this:
.read_csv(input_file, sep=';', parse_dates=['Completed Date '], decimal=',', na_values=[' '])
Works as intended now, thank you very much. :)
@Galeaettu, just change this 2 lines: