Created
July 12, 2023 12:01
-
-
Save jordigg/3ede57b1dbe08167f73004ef9607cc91 to your computer and use it in GitHub Desktop.
Download Expense attachment PDF from Holded ERP API. Expenses are stored in a folder structure by year > month > invoice.pdf a CSV file is generated to provide logs about the progress and files exported
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import requests | |
import json | |
import base64 | |
import os | |
from datetime import datetime | |
import re | |
from tqdm import tqdm | |
# File to store the processed documents | |
processed_docs_file = 'processed_docs.csv' | |
def sanitize_filename(filename): | |
# Remove any character that isn't a word character, space, or hyphen | |
return re.sub(r'[^\w\s-]', '', filename).strip() | |
def main(): | |
# API token for authentication | |
api_token = 'YOUR_API_TOKEN_HERE' | |
# Define the API endpoint for listing documents | |
list_documents_url = 'https://api.holded.com/api/invoicing/v1/documents/purchase' | |
# Define the start and end timestamps | |
start_timestamp = '1514761200' # Unix time 01/01/2018 | |
end_timestamp = '1704063600' # Unix time 31/12/2023 | |
# Define the headers for the API request | |
headers = { | |
'accept': 'application/json', | |
'key': api_token | |
} | |
# Load already processed documents | |
processed_docs = [] | |
if os.path.exists(processed_docs_file): | |
with open(processed_docs_file, 'r') as f: | |
reader = csv.reader(f) | |
processed_docs = list(reader) | |
else: | |
with open(processed_docs_file, 'w', newline='') as f: | |
writer = csv.writer(f) | |
writer.writerow(['doc_id', 'status', 'doc_number', 'date', | |
'contact_name', 'desc', 'filename', 'file_path']) | |
# Make the API request to list the documents | |
response = requests.get( | |
list_documents_url, | |
headers=headers, | |
params={'starttmp': start_timestamp, 'endtmp': end_timestamp} | |
) | |
# Check if the API request was successful | |
if response.status_code == 200: | |
# Parse the JSON response | |
documents = json.loads(response.text) | |
# Loop through each document | |
for doc in tqdm(documents, desc="Processing records"): | |
# Get the document ID | |
doc_id = doc['id'] | |
# Convert the date to a datetime object | |
date = datetime.fromtimestamp(doc['date']) | |
# Define the directory path based on the date | |
directory_path = os.path.join('expenses', str( | |
date.year), str(date.month).zfill(2)) | |
# Create the directory if it doesn't already exist | |
os.makedirs(directory_path, exist_ok=True) | |
doc_number = sanitize_filename(str(doc['docNumber'])) | |
contact_name = sanitize_filename(doc['contactName']) | |
# Define the filename | |
filename = f'{doc_id} {contact_name}.pdf' | |
# Define the full path for the file | |
file_path = os.path.join(directory_path, filename) | |
# If document has not been processed, download the PDF | |
if not any(doc[0] == doc_id for doc in processed_docs): | |
# Define the API endpoint for downloading the PDF | |
get_pdf_url = f'https://api.holded.com/api/invoicing/v1/documents/purchase/{doc_id}/pdf' | |
# Make the API request to download the PDF | |
pdf_response = requests.get(get_pdf_url, headers=headers) | |
# Check if the API request was successful | |
if pdf_response.status_code == 200: | |
# Parse the JSON response | |
pdf_data = json.loads(pdf_response.text) | |
# Check if 'data' is in the response | |
if 'data' in pdf_data: | |
# Decode the PDF data from Base64 | |
pdf_bytes = base64.b64decode(pdf_data['data']) | |
# Save the PDF | |
with open(file_path, 'wb') as pdf_file: | |
pdf_file.write(pdf_bytes) | |
status = "Downloaded" | |
else: | |
# Print the whole response if 'data' is not in it | |
# print(f'Response does not contain data: {pdf_data}') | |
filename = "" | |
file_path = "" | |
status = "Attachment missing" | |
# Write the document details to the CSV file | |
with open(processed_docs_file, 'a', newline='') as f: | |
writer = csv.writer(f) | |
writer.writerow([doc_id, status, doc['docNumber'], date.strftime( | |
'%d/%m/%Y'), doc['contactName'], doc['desc'], filename, file_path]) | |
else: | |
# Print an error message if the API request was unsuccessful | |
print( | |
f'Error downloading PDF for document {doc_id}: {pdf_response.status_code}') | |
else: | |
# Print an error message if the API request was unsuccessful | |
print(f'Error listing documents: {response.status_code}') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment