Skip to content

Instantly share code, notes, and snippets.

@alfredopalhares
Last active February 28, 2023 21:16
Show Gist options
  • Save alfredopalhares/3458177471df0e4991d02b2fea87f41b to your computer and use it in GitHub Desktop.
Save alfredopalhares/3458177471df0e4991d02b2fea87f41b to your computer and use it in GitHub Desktop.
Beancount Importer Module for the Caixa Geral Depositos Bank
""" I need Core Number D for decimal values """
from beancount.core.number import Decimal
from beancount.ingest import importer
from beancount.core import account
from beancount.core import amount
from beancount.core import flags
from beancount.core import data
from beancount.core.position import Cost
import csv
import logging
import os
import re
import sys
import warnings
from datetime import datetime, timedelta
## Caixa Geral de Depositos Importer
# This importer takes the following parameters:
# - account_desc = The account number that is explained on the header of the CSV file so
# "0788001977900" so this will properly match the account
# - target_asset = The target asset to import these transactions too
class CGDImporter(importer.ImporterProtocol):
def __init__(self, account_desc, account_target, doc_encoding = "latin1"):
self.account_desc = account_desc
self.account_target = account_target
self.doc_encoding = doc_encoding
def name(self):
return "Importer Caixa Geral de Depositos CSV"
def identify(self, file):
proper_file = False
if re.match("comprovativo.*.csv", os.path.basename(file.name)):
with open(file.name, encoding=self.doc_encoding) as csv_file:
for index, line in enumerate(csv_file):
if line.__contains__(self.account_desc):
print("File {f} matches account {a}".format(f = os.path.basename(file.name), a = self.account_desc))
proper_file = True
if line.__contains__("Data de início") or line.__contains__("Data de fim"):
print(line.strip("\n")) # The CSV Sometimes have newlines
#TODO: Actually check the fieldnames that match the file
return proper_file
"""
Extracts the information for the Given CSV File
Opens the file and imports the transactions
Adds the original descrito a meta field called source_desc
Adds a Balance operation directly exctracted from the CSV Transaction:
Uses a transaction instead of the last file, since the last entry cointans the balance
at the date the document generated, which might not the the interval of data requested
Currently there is not data deduplication
"""
def extract(self, file):
balance_added = False
with open(file.name, encoding="latin1") as csv_file:
# Skiping the first 7 seven lines just easeier than to parse for now
for _ in range(7):
next(csv_file)
# Headers to Parse
csv_fieldnames =["Data mov.", "Data Valor", "Descrição", "Débito", "Crédito", "Saldo contabilístico", "Saldo disponível" "Categoria"]
entries = [] # Final entries array
for index, row in enumerate(csv.DictReader(csv_file, delimiter=";", fieldnames=csv_fieldnames)):
logging.debug(row)
# Getting the Date and Description
# Last line has the balance
if (row["Data Valor"].isspace()):
logging.debug("No more data found")
break
trans_date = datetime.strptime(row["Data mov."], "%d-%m-%Y")
trans_desc = row["Descrição"].strip()
meta = data.new_metadata(os.path.basename(file.name), index)
txn_debit = self.__convertToDecimal(row["Débito"])
txn_credit = self.__convertToDecimal(row["Crédito"])
if not balance_added:
balance = self.__convertToDecimal(row["Saldo contabilístico"])
print(balance)
entries.append(data.Balance(
meta = meta,
date = trans_date.date() + timedelta(days=1), # Beancount takes balance at the beginning of the day
account = self.account_target,
amount = amount.Amount(balance, "EUR"),
tolerance = None,
diff_amount = None,
))
balance_added = True
if txn_debit > txn_credit:
is_expense = True
else:
is_expense = False
txn_details = self.__decodeDesc(desc = trans_desc, is_expense = is_expense)
txn = data.Transaction(
meta = meta,
date = trans_date.date(),
payee = txn_details["payee"],
flag = "txn",
tags = set(),
links = set(),
narration = txn_details["narration"],
postings = []
)
# TODO: Improve this logic
if is_expense:
txn.postings.append(
data.Posting(
account = self.account_target,
units = amount.Amount(-1*txn_debit, "EUR"),
cost = None,
price = None,
flag = None,
meta = dict({ "source_desc": trans_desc })
)
)
txn.postings.append(
data.Posting(
account = txn_details["category"],
units = amount.Amount(txn_debit, "EUR"),
cost = None,
price = None,
flag = flags.FLAG_WARNING,
meta = None
)
)
else:
txn.postings.append(
data.Posting(
account = txn_details["category"],
units = amount.Amount(-1 * txn_credit, "EUR"),
cost = None,
price = None,
flag = flags.FLAG_WARNING,
meta = None
)
)
txn.postings.append(
data.Posting(
account = self.account_target,
units = amount.Amount( txn_credit, "EUR"),
cost = None,
price = None,
flag = None,
meta = dict({ "source_desc": trans_desc }) # TODO: Add final file name
)
)
entries.append(txn)
return entries
"""
Returns a string with the relative path where the import file should be
Right now it just returns the target account
"""
def file_account(self, file):
return self.account_target
"""
Returns a data to be used in file name
This is the date where the file was generated, not the date
of the extract
"""
def file_date(self, file):
with open(file.name, encoding=self.doc_encoding) as csv_file:
for line in csv_file:
if line.__contains__("Consultar saldos e movimentos à ordem"):
header = line.strip("\n").split("-")
date = datetime(year = int(header[3]),
month = int(header[2]),
day = int(header[1]))
print("Date extracted: {d}".format( d = date ))
return date
return None
"""
Retuns the final file descriptions
Currently reads the date of beginning and the end and adds them
"""
def file_name(self, file):
_, file_extension = os.path.splitext(os.path.basename(file.name))
with open(file.name, encoding=self.doc_encoding) as csv_file:
for line in csv_file:
if line.__contains__("Data de início"):
from_date_string = line.split(";")[1].strip()
from_date = datetime.strptime(from_date_string, "%d-%m-%Y")
if line.__contains__("Data de fim"):
end_date_string = line.split(";")[1].strip()
end_date = datetime.strptime(end_date_string, "%d-%m-%Y")
return "{acc}-from-{fd}-to-{ed}{ext}".format(
acc = self.account_target.replace(":",""),
fd = from_date.strftime("%Y-%m-%d"),
ed = end_date.strftime("%Y-%m-%d"),
ext = file_extension)
"""
Numbers on the CSV are 12.456,78 so we need to replace the dot and comas
Uses the built in number present in beancount/core/number.py
"""
def __convertToDecimal(self, value):
if value is None or value == "" or value.isspace():
return Decimal()
value_no_dot = value.replace(".","")
value_no_comma = value_no_dot.replace(",",".")
return Decimal(value_no_comma)
## Tries to decode the description of some known transactions, such as fees
# and other common payments, when It does not it Uses
# Expenses:Fixme and Income:
def __decodeDesc(self, desc, is_expense = True):
if is_expense:
un_category = "Expenses:Fixme"
else:
un_category = "Income:Fixme"
if desc.__contains__("IMPOSTO SELO S COMISS"):
return {
"payee" : "CGD",
"narration" : "Comissao Imposto Selo",
"category" : "Expenses:Banking:Fees"
}
elif desc.__contains__("COMISSAO COMPRAS FORA"):
return {
"payee" : "CGD",
"narration" : "Comissao Compras Fora",
"category" : "Expenses:Banking:Fees"
}
elif desc.__contains__("COMISSAO CONTA CAIXA"):
return {
"payee" : "CGD",
"narration" : "Comissao Conta Caixa",
"category" : "Expenses:Banking:Fees"
}
elif desc.__contains__("COMPRA SUPERMERCADO F") or desc.__contains__("COMPRA SUPERMERCADO D"):
return {
"payee" : "Supermecado Freixo",
"narration" : "Compras Semanais",
"category" : "Expenses:Food:Groceries"
}
elif desc.__contains__("COMPRA CONTINENTE PT"):
return {
"payee" : "Continente Ponte de Lima",
"narration" : "Compras Semanais",
"category" : "Expenses:Food:Groceries"
}
elif desc.__contains__("DespesasAnuaisPoupanc"):
return {
"payee" : "CGD",
"narration" : "Poupanca para Despesas Anuais",
"category" : "Assets:CGD:ContaPoupanca"
}
elif desc.__contains__("Renda Cabacos Mensal"):
return {
"payee" : "Cabacos",
"narration" : "Renda Mensal",
"category" : "Expenses:House:Rent"
}
elif desc.__contains__("EDP COMERCIAL COMERCI"):
return {
"payee" : "EDP",
"narration" : "Electricidade",
"category" : "Expenses:House:Utilities"
}
elif desc.__contains__("MEO SA"):
return {
"payee" : "MEO",
"narration" : "Mensalidade Internet",
"category" : "Expenses:House:Utilities"
}
elif desc.__contains__("eToro Funding"):
return {
"payee" : "eToro",
"narration" : "Funding eToro Account",
"category" : "Assets:Brokers:Etoro:EToroMoney"
}
elif desc.__contains__("COMPRA ALTADONNA"):
return {
"payee" : "Altadonna",
"narration" : "Pizza Familiar",
"category" : "Expenses:Food:EatingOut"
}
elif desc.__contains__("COMPRA GOOGLE YOUTUBE"):
return {
"payee" : "Youtube",
"narration" : "Mensalidade Premium",
"category" : "Expenses:Subscriptions:Streaming"
}
elif desc.__contains__("COMPRA DISNEY PLUS"):
return {
"payee" : "Disney Plus",
"narration" : "Streaming Disney Plus",
"category" : "Expenses:Subscriptions:Streaming"
}
elif desc.__contains__("COMPRA NETFLIX COM"):
return {
"payee" : "Netflix",
"narration" : "Mensalidade Netflix",
"category" : "Expenses:Subscriptions:Streaming"
}
elif desc.__contains__("COMPRA DIGITALOCEAN C"):
return {
"payee" : "Digital Ocean",
"narration" : "Mensalidade Servidores",
"category" : "Expenses:Subscriptions:Servers"
}
elif desc.__contains__("COMPRA GOOGLE GSUITE"):
return {
"payee" : "Google",
"narration" : "Mensalidade Google Suite",
"category" : "Expenses:Subscriptions:Servers"
}
elif desc.__contains__("COMPRA CARLA LISETE T "):
return {
"payee" : "Carla Esticista",
"narration" : "Depilacao",
"category" : "Expenses:SelfCare"
}
elif desc.__contains__("PPRO Payment Services"):
return {
"payee" : "Barkyn",
"narration" : "Racao Lara e Steve",
"category" : "Expenses:Pets:Food"
}
else:
return {
"payee" : "",
"narration" : desc,
"category" : un_category
}
#!/usr/bin/env python3
# You need this to load the importers on the current
# directory, you should learn more about the path
import sys
from os import path
sys.path.insert(0, path.join(path.dirname(__file__)))
from importers import CGD
CONFIG = [
CGD.Importer(account_desc="<acc_number>",
account_target="Assets:CGD:ContaCorrente")
]
from datetime import date, datetime, timedelta
from decimal import Decimal
from textwrap import dedent
import pytest
from beancount.core.data import Balance, Transaction
from importers.CGD import CGDImporter
account_id = "1234567890123" # Account ID to be identified
account_target = "Assets:CGD:ContaCorrente"
tmp_file_encoding = "utf-8" # Pytest temporary files are utf-8
@pytest.fixture
def tmp_file(tmp_path):
return tmp_path / "comprovativo.csv"
"""
Tests the module identify function by adding a file wih the name and just
having a random text on the file
"""
def test_identify_not_correct(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text("Not a valid Bank file sure!")
with tmp_file.open() as file:
assert not importer.identify(file)
"""
Tests the identification of file
The file is a correct file
Also tests if but its for another account
"""
def test_identify_empty_file(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
dumb_importer = CGDImporter(account_desc = "666666666" , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text(
dedent(
"""
Consultar saldos e movimentos à ordem - 20-02-2023
Conta ;1234567890123 - EUR - Conta Extracto
Data de início ;01-12-2022
Data de fim ;31-12-2022
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ;
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ;
"""
).strip()
)
with tmp_file.open() as file:
assert importer.identify(file)
assert not dumb_importer.identify(file)
"""
Tests the extraction of an empty file
"""
def test_extract_empty_file(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text(
dedent(
"""
Consultar saldos e movimentos à ordem - 20-02-2023
Conta ;1234567890123 - EUR - Conta Extracto
Data de início ;01-12-2022
Data de fim ;31-12-2022
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ;
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ;
"""
).strip()
)
with tmp_file.open() as file:
entries = importer.extract(file)
assert len(entries) == 0
"""
Tests the file identification correctly
checks that the given account number matches
"""
def test_extract_multiple_transactions(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text(
dedent(
"""
Consultar saldos e movimentos à ordem - 20-02-2023
Conta ;1234567890123 - EUR - Conta Extracto
Data de início ;01-12-2022
Data de fim ;31-12-2022
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ;
12-12-2022;10-12-2022;IMPOSTO SELO S COMISS ;0,01;;2.555,06;2.555,06;LEVANTAMENTOS ;
12-12-2022;10-12-2022;COMPRA GOOGLE YOUTUBE ;8,49;;2.555,07;2.555,07;COMPRAS ;
03-12-2022;30-11-2022;IMPOSTO SELO S COMISS ;0,01;;1.998,85;1.998,85;LEVANTAMENTOS ;
03-12-2022;30-11-2022;COMISSAO COMPRAS FORA ;0,29;;1.998,86;1.998,86;LEVANTAMENTOS ;
03-12-2022;03-12-2022;COMISSAO CONTA CAIXA ;4,16;;2.008,86;1.999,15;Diversos ;
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ;
"""
).strip()
)
with tmp_file.open() as file:
entries = importer.extract(file)
# There should be 6 entries, 5 transactions and one balance operation
assert len(entries) == 6
# The first entry is a balance enrty
# Assuming the value conversiton
assert entries[0].amount.number == Decimal("2555.06")
# Test the details of the transaction
assert entries[1].payee == "CGD"
assert entries[1].narration == "Comissao Imposto Selo"
assert entries[1].postings[0].account == account_target
# Test the meta information
assert entries[1].postings[0].meta["source_desc"] == "IMPOSTO SELO S COMISS"
# Tes the price and category translation
assert entries[1].postings[1].account == "Expenses:Banking:Fees"
assert entries[1].postings[1].units.number == Decimal("0.01")
"""
Tests file account
Right now its the file account given
"""
def test_file_account(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text(
dedent(
"""
Consultar saldos e movimentos à ordem - 20-02-2023
Conta ;1234567890123 - EUR - Conta Extracto
Data de início ;01-12-2022
Data de fim ;31-12-2022
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ;
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ;
"""
).strip()
)
with tmp_file.open() as file:
assert importer.file_account(file) == account_target
assert not importer.file_account(file) == "Bananas"
"""
Tests date
Date is the extracted from the date where the file is generated, not
the interval of the extract
"""
def test_file_date(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text(
dedent(
"""
Consultar saldos e movimentos à ordem - 20-02-2023
Conta ;1234567890123 - EUR - Conta Extracto
Data de início ;01-12-2022
Data de fim ;31-12-2022
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ;
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ;
"""
).strip()
)
with tmp_file.open() as file:
assert importer.file_date(file) == datetime(
year = 2023,
month = 2,
day = 20 )
"""
Tests the final file name of the system
Should be account-from-start-date-to-end-date
"""
def test_file_name(tmp_file):
importer = CGDImporter(account_desc = account_id , account_target = account_target, doc_encoding=tmp_file_encoding)
tmp_file.write_text(
dedent(
"""
Consultar saldos e movimentos à ordem - 20-02-2023
Conta ;1234567890123 - EUR - Conta Extracto
Data de início ;01-12-2022
Data de fim ;31-12-2022
Data mov. ;Data valor ;Descrição ;Débito ;Crédito ;Saldo contabilístico ;Saldo disponível ;Categoria ;
; ; ; ;Saldo contabilístico ;2.095,91 EUR ; ; ;
"""
).strip()
)
with tmp_file.open() as file:
assert importer.file_name(file) == "{acc}-from-2022-12-01-to-2022-12-31.csv".format( acc = account_target.replace(":", "") )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment