Last active
February 28, 2023 21:16
-
-
Save alfredopalhares/3458177471df0e4991d02b2fea87f41b to your computer and use it in GitHub Desktop.
Beancount Importer Module for the Caixa Geral Depositos Bank
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" I need Core Number D for decimal values """ | |
from beancount.core.number import Decimal | |
from beancount.ingest import importer | |
from beancount.core import account | |
from beancount.core import amount | |
from beancount.core import flags | |
from beancount.core import data | |
from beancount.core.position import Cost | |
import csv | |
import logging | |
import os | |
import re | |
import sys | |
import warnings | |
from datetime import datetime, timedelta | |
## Caixa Geral de Depositos Importer | |
# This importer takes the following parameters: | |
# - account_desc = The account number that is explained on the header of the CSV file so | |
# "0788001977900" so this will properly match the account | |
# - target_asset = The target asset to import these transactions too | |
class CGDImporter(importer.ImporterProtocol): | |
def __init__(self, account_desc, account_target, doc_encoding = "latin1"): | |
self.account_desc = account_desc | |
self.account_target = account_target | |
self.doc_encoding = doc_encoding | |
def name(self): | |
return "Importer Caixa Geral de Depositos CSV" | |
def identify(self, file): | |
proper_file = False | |
if re.match("comprovativo.*.csv", os.path.basename(file.name)): | |
with open(file.name, encoding=self.doc_encoding) as csv_file: | |
for index, line in enumerate(csv_file): | |
if line.__contains__(self.account_desc): | |
print("File {f} matches account {a}".format(f = os.path.basename(file.name), a = self.account_desc)) | |
proper_file = True | |
if line.__contains__("Data de início") or line.__contains__("Data de fim"): | |
print(line.strip("\n")) # The CSV Sometimes have newlines | |
#TODO: Actually check the fieldnames that match the file | |
return proper_file | |
""" | |
Extracts the information for the Given CSV File | |
Opens the file and imports the transactions | |
Adds the original descrito a meta field called source_desc | |
Adds a Balance operation directly exctracted from the CSV Transaction: | |
Uses a transaction instead of the last file, since the last entry cointans the balance | |
at the date the document generated, which might not the the interval of data requested | |
Currently there is not data deduplication | |
""" | |
def extract(self, file): | |
balance_added = False | |
with open(file.name, encoding="latin1") as csv_file: | |
# Skiping the first 7 seven lines just easeier than to parse for now | |
for _ in range(7): | |
next(csv_file) | |
# Headers to Parse | |
csv_fieldnames =["Data mov.", "Data Valor", "Descrição", "Débito", "Crédito", "Saldo contabilístico", "Saldo disponível" "Categoria"] | |
entries = [] # Final entries array | |
for index, row in enumerate(csv.DictReader(csv_file, delimiter=";", fieldnames=csv_fieldnames)): | |
logging.debug(row) | |
# Getting the Date and Description | |
# Last line has the balance | |
if (row["Data Valor"].isspace()): | |
logging.debug("No more data found") | |
break | |
trans_date = datetime.strptime(row["Data mov."], "%d-%m-%Y") | |
trans_desc = row["Descrição"].strip() | |
meta = data.new_metadata(os.path.basename(file.name), index) | |
txn_debit = self.__convertToDecimal(row["Débito"]) | |
txn_credit = self.__convertToDecimal(row["Crédito"]) | |
if not balance_added: | |
balance = self.__convertToDecimal(row["Saldo contabilístico"]) | |
print(balance) | |
entries.append(data.Balance( | |
meta = meta, | |
date = trans_date.date() + timedelta(days=1), # Beancount takes balance at the beginning of the day | |
account = self.account_target, | |
amount = amount.Amount(balance, "EUR"), | |
tolerance = None, | |
diff_amount = None, | |
)) | |
balance_added = True | |
if txn_debit > txn_credit: | |
is_expense = True | |
else: | |
is_expense = False | |
txn_details = self.__decodeDesc(desc = trans_desc, is_expense = is_expense) | |
txn = data.Transaction( | |
meta = meta, | |
date = trans_date.date(), | |
payee = txn_details["payee"], | |
flag = "txn", | |
tags = set(), | |
links = set(), | |
narration = txn_details["narration"], | |
postings = [] | |
) | |
# TODO: Improve this logic | |
if is_expense: | |
txn.postings.append( | |
data.Posting( | |
account = self.account_target, | |
units = amount.Amount(-1*txn_debit, "EUR"), | |
cost = None, | |
price = None, | |
flag = None, | |
meta = dict({ "source_desc": trans_desc }) | |
) | |
) | |
txn.postings.append( | |
data.Posting( | |
account = txn_details["category"], | |
units = amount.Amount(txn_debit, "EUR"), | |
cost = None, | |
price = None, | |
flag = flags.FLAG_WARNING, | |
meta = None | |
) | |
) | |
else: | |
txn.postings.append( | |
data.Posting( | |
account = txn_details["category"], | |
units = amount.Amount(-1 * txn_credit, "EUR"), | |
cost = None, | |
price = None, | |
flag = flags.FLAG_WARNING, | |
meta = None | |
) | |
) | |
txn.postings.append( | |
data.Posting( | |
account = self.account_target, | |
units = amount.Amount( txn_credit, "EUR"), | |
cost = None, | |
price = None, | |
flag = None, | |
meta = dict({ "source_desc": trans_desc }) # TODO: Add final file name | |
) | |
) | |
entries.append(txn) | |
return entries | |
""" | |
Returns a string with the relative path where the import file should be | |
Right now it just returns the target account | |
""" | |
def file_account(self, file): | |
return self.account_target | |
""" | |
Returns a data to be used in file name | |
This is the date where the file was generated, not the date | |
of the extract | |
""" | |
def file_date(self, file): | |
with open(file.name, encoding=self.doc_encoding) as csv_file: | |
for line in csv_file: | |
if line.__contains__("Consultar saldos e movimentos à ordem"): | |
header = line.strip("\n").split("-") | |
date = datetime(year = int(header[3]), | |
month = int(header[2]), | |
day = int(header[1])) | |
print("Date extracted: {d}".format( d = date )) | |
return date | |
return None | |
""" | |
Retuns the final file descriptions | |
Currently reads the date of beginning and the end and adds them | |
""" | |
def file_name(self, file): | |
_, file_extension = os.path.splitext(os.path.basename(file.name)) | |
with open(file.name, encoding=self.doc_encoding) as csv_file: | |
for line in csv_file: | |
if line.__contains__("Data de início"): | |
from_date_string = line.split(";")[1].strip() | |
from_date = datetime.strptime(from_date_string, "%d-%m-%Y") | |
if line.__contains__("Data de fim"): | |
end_date_string = line.split(";")[1].strip() | |
end_date = datetime.strptime(end_date_string, "%d-%m-%Y") | |
return "{acc}-from-{fd}-to-{ed}{ext}".format( | |
acc = self.account_target.replace(":",""), | |
fd = from_date.strftime("%Y-%m-%d"), | |
ed = end_date.strftime("%Y-%m-%d"), | |
ext = file_extension) | |
""" | |
Numbers on the CSV are 12.456,78 so we need to replace the dot and comas | |
Uses the built in number present in beancount/core/number.py | |
""" | |
def __convertToDecimal(self, value): | |
if value is None or value == "" or value.isspace(): | |
return Decimal() | |
value_no_dot = value.replace(".","") | |
value_no_comma = value_no_dot.replace(",",".") | |
return Decimal(value_no_comma) | |
## Tries to decode the description of some known transactions, such as fees | |
# and other common payments, when It does not it Uses | |
# Expenses:Fixme and Income: | |
def __decodeDesc(self, desc, is_expense = True): | |
if is_expense: | |
un_category = "Expenses:Fixme" | |
else: | |
un_category = "Income:Fixme" | |
if desc.__contains__("IMPOSTO SELO S COMISS"): | |
return { | |
"payee" : "CGD", | |
"narration" : "Comissao Imposto Selo", | |
"category" : "Expenses:Banking:Fees" | |
} | |
elif desc.__contains__("COMISSAO COMPRAS FORA"): | |
return { | |
"payee" : "CGD", | |
"narration" : "Comissao Compras Fora", | |
"category" : "Expenses:Banking:Fees" | |
} | |
elif desc.__contains__("COMISSAO CONTA CAIXA"): | |
return { | |
"payee" : "CGD", | |
"narration" : "Comissao Conta Caixa", | |
"category" : "Expenses:Banking:Fees" | |
} | |
elif desc.__contains__("COMPRA SUPERMERCADO F") or desc.__contains__("COMPRA SUPERMERCADO D"): | |
return { | |
"payee" : "Supermecado Freixo", | |
"narration" : "Compras Semanais", | |
"category" : "Expenses:Food:Groceries" | |
} | |
elif desc.__contains__("COMPRA CONTINENTE PT"): | |
return { | |
"payee" : "Continente Ponte de Lima", | |
"narration" : "Compras Semanais", | |
"category" : "Expenses:Food:Groceries" | |
} | |
elif desc.__contains__("DespesasAnuaisPoupanc"): | |
return { | |
"payee" : "CGD", | |
"narration" : "Poupanca para Despesas Anuais", | |
"category" : "Assets:CGD:ContaPoupanca" | |
} | |
elif desc.__contains__("Renda Cabacos Mensal"): | |
return { | |
"payee" : "Cabacos", | |
"narration" : "Renda Mensal", | |
"category" : "Expenses:House:Rent" | |
} | |
elif desc.__contains__("EDP COMERCIAL COMERCI"): | |
return { | |
"payee" : "EDP", | |
"narration" : "Electricidade", | |
"category" : "Expenses:House:Utilities" | |
} | |
elif desc.__contains__("MEO SA"): | |
return { | |
"payee" : "MEO", | |
"narration" : "Mensalidade Internet", | |
"category" : "Expenses:House:Utilities" | |
} | |
elif desc.__contains__("eToro Funding"): | |
return { | |
"payee" : "eToro", | |
"narration" : "Funding eToro Account", | |
"category" : "Assets:Brokers:Etoro:EToroMoney" | |
} | |
elif desc.__contains__("COMPRA ALTADONNA"): | |
return { | |
"payee" : "Altadonna", | |
"narration" : "Pizza Familiar", | |
"category" : "Expenses:Food:EatingOut" | |
} | |
elif desc.__contains__("COMPRA GOOGLE YOUTUBE"): | |
return { | |
"payee" : "Youtube", | |
"narration" : "Mensalidade Premium", | |
"category" : "Expenses:Subscriptions:Streaming" | |
} | |
elif desc.__contains__("COMPRA DISNEY PLUS"): | |
return { | |
"payee" : "Disney Plus", | |
"narration" : "Streaming Disney Plus", | |
"category" : "Expenses:Subscriptions:Streaming" | |
} | |
elif desc.__contains__("COMPRA NETFLIX COM"): | |
return { | |
"payee" : "Netflix", | |
"narration" : "Mensalidade Netflix", | |
"category" : "Expenses:Subscriptions:Streaming" | |
} | |
elif desc.__contains__("COMPRA DIGITALOCEAN C"): | |
return { | |
"payee" : "Digital Ocean", | |
"narration" : "Mensalidade Servidores", | |
"category" : "Expenses:Subscriptions:Servers" | |
} | |
elif desc.__contains__("COMPRA GOOGLE GSUITE"): | |
return { | |
"payee" : "Google", | |
"narration" : "Mensalidade Google Suite", | |
"category" : "Expenses:Subscriptions:Servers" | |
} | |
elif desc.__contains__("COMPRA CARLA LISETE T "): | |
return { | |
"payee" : "Carla Esticista", | |
"narration" : "Depilacao", | |
"category" : "Expenses:SelfCare" | |
} | |
elif desc.__contains__("PPRO Payment Services"): | |
return { | |
"payee" : "Barkyn", | |
"narration" : "Racao Lara e Steve", | |
"category" : "Expenses:Pets:Food" | |
} | |
else: | |
return { | |
"payee" : "", | |
"narration" : desc, | |
"category" : un_category | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# You need this to load the importers on the current | |
# directory, you should learn more about the path | |
import sys | |
from os import path | |
sys.path.insert(0, path.join(path.dirname(__file__))) | |
from importers import CGD | |
CONFIG = [ | |
CGD.Importer(account_desc="<acc_number>", | |
account_target="Assets:CGD:ContaCorrente") | |
] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import date, datetime, timedelta | |
from decimal import Decimal | |
from textwrap import dedent | |
import pytest | |
from beancount.core.data import Balance, Transaction | |
from importers.CGD import CGDImporter | |
account_id = "1234567890123" # Account ID to be identified | |
account_target = "Assets:CGD:ContaCorrente" | |
tmp_file_encoding = "utf-8" # Pytest temporary files are utf-8 | |
@pytest.fixture | |
def tmp_file(tmp_path): | |
return tmp_path / "comprovativo.csv" | |
""" | |
Tests the module identify function by adding a file wih the name and just | |
having a random text on the file | |
""" | |
def test_identify_not_correct(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text("Not a valid Bank file sure!") | |
with tmp_file.open() as file: | |
assert not importer.identify(file) | |
""" | |
Tests the identification of file | |
The file is a correct file | |
Also tests if but its for another account | |
""" | |
def test_identify_empty_file(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
dumb_importer = CGDImporter(account_desc = "666666666" , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text( | |
dedent( | |
""" | |
Consultar saldos e movimentos à ordem - 20-02-2023 | |
Conta ;1234567890123 - EUR - Conta Extracto | |
Data de início ;01-12-2022 | |
Data de fim ;31-12-2022 | |
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ; | |
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ; | |
""" | |
).strip() | |
) | |
with tmp_file.open() as file: | |
assert importer.identify(file) | |
assert not dumb_importer.identify(file) | |
""" | |
Tests the extraction of an empty file | |
""" | |
def test_extract_empty_file(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text( | |
dedent( | |
""" | |
Consultar saldos e movimentos à ordem - 20-02-2023 | |
Conta ;1234567890123 - EUR - Conta Extracto | |
Data de início ;01-12-2022 | |
Data de fim ;31-12-2022 | |
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ; | |
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ; | |
""" | |
).strip() | |
) | |
with tmp_file.open() as file: | |
entries = importer.extract(file) | |
assert len(entries) == 0 | |
""" | |
Tests the file identification correctly | |
checks that the given account number matches | |
""" | |
def test_extract_multiple_transactions(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text( | |
dedent( | |
""" | |
Consultar saldos e movimentos à ordem - 20-02-2023 | |
Conta ;1234567890123 - EUR - Conta Extracto | |
Data de início ;01-12-2022 | |
Data de fim ;31-12-2022 | |
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ; | |
12-12-2022;10-12-2022;IMPOSTO SELO S COMISS ;0,01;;2.555,06;2.555,06;LEVANTAMENTOS ; | |
12-12-2022;10-12-2022;COMPRA GOOGLE YOUTUBE ;8,49;;2.555,07;2.555,07;COMPRAS ; | |
03-12-2022;30-11-2022;IMPOSTO SELO S COMISS ;0,01;;1.998,85;1.998,85;LEVANTAMENTOS ; | |
03-12-2022;30-11-2022;COMISSAO COMPRAS FORA ;0,29;;1.998,86;1.998,86;LEVANTAMENTOS ; | |
03-12-2022;03-12-2022;COMISSAO CONTA CAIXA ;4,16;;2.008,86;1.999,15;Diversos ; | |
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ; | |
""" | |
).strip() | |
) | |
with tmp_file.open() as file: | |
entries = importer.extract(file) | |
# There should be 6 entries, 5 transactions and one balance operation | |
assert len(entries) == 6 | |
# The first entry is a balance enrty | |
# Assuming the value conversiton | |
assert entries[0].amount.number == Decimal("2555.06") | |
# Test the details of the transaction | |
assert entries[1].payee == "CGD" | |
assert entries[1].narration == "Comissao Imposto Selo" | |
assert entries[1].postings[0].account == account_target | |
# Test the meta information | |
assert entries[1].postings[0].meta["source_desc"] == "IMPOSTO SELO S COMISS" | |
# Tes the price and category translation | |
assert entries[1].postings[1].account == "Expenses:Banking:Fees" | |
assert entries[1].postings[1].units.number == Decimal("0.01") | |
""" | |
Tests file account | |
Right now its the file account given | |
""" | |
def test_file_account(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text( | |
dedent( | |
""" | |
Consultar saldos e movimentos à ordem - 20-02-2023 | |
Conta ;1234567890123 - EUR - Conta Extracto | |
Data de início ;01-12-2022 | |
Data de fim ;31-12-2022 | |
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ; | |
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ; | |
""" | |
).strip() | |
) | |
with tmp_file.open() as file: | |
assert importer.file_account(file) == account_target | |
assert not importer.file_account(file) == "Bananas" | |
""" | |
Tests date | |
Date is the extracted from the date where the file is generated, not | |
the interval of the extract | |
""" | |
def test_file_date(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text( | |
dedent( | |
""" | |
Consultar saldos e movimentos à ordem - 20-02-2023 | |
Conta ;1234567890123 - EUR - Conta Extracto | |
Data de início ;01-12-2022 | |
Data de fim ;31-12-2022 | |
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ; | |
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ; | |
""" | |
).strip() | |
) | |
with tmp_file.open() as file: | |
assert importer.file_date(file) == datetime( | |
year = 2023, | |
month = 2, | |
day = 20 ) | |
""" | |
Tests the final file name of the system | |
Should be account-from-start-date-to-end-date | |
""" | |
def test_file_name(tmp_file): | |
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding) | |
tmp_file.write_text( | |
dedent( | |
""" | |
Consultar saldos e movimentos à ordem - 20-02-2023 | |
Conta ;1234567890123 - EUR - Conta Extracto | |
Data de início ;01-12-2022 | |
Data de fim ;31-12-2022 | |
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ; | |
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ; | |
""" | |
).strip() | |
) | |
with tmp_file.open() as file: | |
assert importer.file_name(file) == "{acc}-from-2022-12-01-to-2022-12-31.csv".format( acc = account_target.replace(":", "") ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment