jordigg · July 12, 2023 12:01
diff --git a/expense_download.py b/expense_download.py
 import csv
 import requests
 import json
 import base64
 import os
 from datetime import datetime
 import re
 from tqdm import tqdm

 # File to store the processed documents
 processed_docs_file = 'processed_docs.csv'


 def sanitize_filename(filename):
    # Remove any character that isn't a word character, space, or hyphen
    return re.sub(r'[^\w\s-]', '', filename).strip()


 def main():
    # API token for authentication
    api_token = 'YOUR_API_TOKEN_HERE'

    # Define the API endpoint for listing documents
    list_documents_url = 'https://api.holded.com/api/invoicing/v1/documents/purchase'

    # Define the start and end timestamps
    start_timestamp = '1514761200'  # Unix time 01/01/2018
    end_timestamp = '1704063600'  # Unix time 31/12/2023

    # Define the headers for the API request
    headers = {
        'accept': 'application/json',
        'key': api_token
    }

    # Load already processed documents
    processed_docs = []
    if os.path.exists(processed_docs_file):
        with open(processed_docs_file, 'r') as f:
            reader = csv.reader(f)
            processed_docs = list(reader)
    else:
        with open(processed_docs_file, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['doc_id', 'status', 'doc_number', 'date',
                            'contact_name', 'desc', 'filename', 'file_path'])

    # Make the API request to list the documents
    response = requests.get(
        list_documents_url,
        headers=headers,
        params={'starttmp': start_timestamp, 'endtmp': end_timestamp}
    )

    # Check if the API request was successful
    if response.status_code == 200:
        # Parse the JSON response
        documents = json.loads(response.text)

        # Loop through each document
        for doc in tqdm(documents, desc="Processing records"):
            # Get the document ID
            doc_id = doc['id']

            # Convert the date to a datetime object
            date = datetime.fromtimestamp(doc['date'])

            # Define the directory path based on the date
            directory_path = os.path.join('expenses', str(
                date.year), str(date.month).zfill(2))

            # Create the directory if it doesn't already exist
            os.makedirs(directory_path, exist_ok=True)

            doc_number = sanitize_filename(str(doc['docNumber']))
            contact_name = sanitize_filename(doc['contactName'])

            # Define the filename
            filename = f'{doc_id} {contact_name}.pdf'

            # Define the full path for the file
            file_path = os.path.join(directory_path, filename)

            # If document has not been processed, download the PDF
            if not any(doc[0] == doc_id for doc in processed_docs):
                # Define the API endpoint for downloading the PDF
                get_pdf_url = f'https://api.holded.com/api/invoicing/v1/documents/purchase/{doc_id}/pdf'

                # Make the API request to download the PDF
                pdf_response = requests.get(get_pdf_url, headers=headers)

                # Check if the API request was successful
                if pdf_response.status_code == 200:
                    # Parse the JSON response
                    pdf_data = json.loads(pdf_response.text)

                    # Check if 'data' is in the response
                    if 'data' in pdf_data:
                        # Decode the PDF data from Base64
                        pdf_bytes = base64.b64decode(pdf_data['data'])

                        # Save the PDF
                        with open(file_path, 'wb') as pdf_file:
                            pdf_file.write(pdf_bytes)

                        status = "Downloaded"
                    else:
                        # Print the whole response if 'data' is not in it
                        # print(f'Response does not contain data: {pdf_data}')
                        filename = ""
                        file_path = ""
                        status = "Attachment missing"

                    # Write the document details to the CSV file
                    with open(processed_docs_file, 'a', newline='') as f:
                        writer = csv.writer(f)
                        writer.writerow([doc_id, status, doc['docNumber'], date.strftime(
                            '%d/%m/%Y'), doc['contactName'], doc['desc'], filename, file_path])

                else:
                    # Print an error message if the API request was unsuccessful
                    print(
                        f'Error downloading PDF for document {doc_id}: {pdf_response.status_code}')

    else:
        # Print an error message if the API request was unsuccessful
        print(f'Error listing documents: {response.status_code}')


 if __name__ == '__main__':
    main()
	import csv
	import requests
	import json
	import base64
	import os
	from datetime import datetime
	import re
	from tqdm import tqdm

	# File to store the processed documents
	processed_docs_file = 'processed_docs.csv'


	def sanitize_filename(filename):
	# Remove any character that isn't a word character, space, or hyphen
	return re.sub(r'[^\w\s-]', '', filename).strip()


	def main():
	# API token for authentication
	api_token = 'YOUR_API_TOKEN_HERE'

	# Define the API endpoint for listing documents
	list_documents_url = 'https://api.holded.com/api/invoicing/v1/documents/purchase'

	# Define the start and end timestamps
	start_timestamp = '1514761200' # Unix time 01/01/2018
	end_timestamp = '1704063600' # Unix time 31/12/2023

	# Define the headers for the API request
	headers = {
	'accept': 'application/json',
	'key': api_token
	}

	# Load already processed documents
	processed_docs = []
	if os.path.exists(processed_docs_file):
	with open(processed_docs_file, 'r') as f:
	reader = csv.reader(f)
	processed_docs = list(reader)
	else:
	with open(processed_docs_file, 'w', newline='') as f:
	writer = csv.writer(f)
	writer.writerow(['doc_id', 'status', 'doc_number', 'date',
	'contact_name', 'desc', 'filename', 'file_path'])

	# Make the API request to list the documents
	response = requests.get(
	list_documents_url,
	headers=headers,
	params={'starttmp': start_timestamp, 'endtmp': end_timestamp}
	)

	# Check if the API request was successful
	if response.status_code == 200:
	# Parse the JSON response
	documents = json.loads(response.text)

	# Loop through each document
	for doc in tqdm(documents, desc="Processing records"):
	# Get the document ID
	doc_id = doc['id']

	# Convert the date to a datetime object
	date = datetime.fromtimestamp(doc['date'])

	# Define the directory path based on the date
	directory_path = os.path.join('expenses', str(
	date.year), str(date.month).zfill(2))

	# Create the directory if it doesn't already exist
	os.makedirs(directory_path, exist_ok=True)

	doc_number = sanitize_filename(str(doc['docNumber']))
	contact_name = sanitize_filename(doc['contactName'])

	# Define the filename
	filename = f'{doc_id} {contact_name}.pdf'

	# Define the full path for the file
	file_path = os.path.join(directory_path, filename)

	# If document has not been processed, download the PDF
	if not any(doc[0] == doc_id for doc in processed_docs):
	# Define the API endpoint for downloading the PDF
	get_pdf_url = f'https://api.holded.com/api/invoicing/v1/documents/purchase/{doc_id}/pdf'

	# Make the API request to download the PDF
	pdf_response = requests.get(get_pdf_url, headers=headers)

	# Check if the API request was successful
	if pdf_response.status_code == 200:
	# Parse the JSON response
	pdf_data = json.loads(pdf_response.text)

	# Check if 'data' is in the response
	if 'data' in pdf_data:
	# Decode the PDF data from Base64
	pdf_bytes = base64.b64decode(pdf_data['data'])

	# Save the PDF
	with open(file_path, 'wb') as pdf_file:
	pdf_file.write(pdf_bytes)

	status = "Downloaded"
	else:
	# Print the whole response if 'data' is not in it
	# print(f'Response does not contain data: {pdf_data}')
	filename = ""
	file_path = ""
	status = "Attachment missing"

	# Write the document details to the CSV file
	with open(processed_docs_file, 'a', newline='') as f:
	writer = csv.writer(f)
	writer.writerow([doc_id, status, doc['docNumber'], date.strftime(
	'%d/%m/%Y'), doc['contactName'], doc['desc'], filename, file_path])

	else:
	# Print an error message if the API request was unsuccessful
	print(
	f'Error downloading PDF for document {doc_id}: {pdf_response.status_code}')

	else:
	# Print an error message if the API request was unsuccessful
	print(f'Error listing documents: {response.status_code}')


	if __name__ == '__main__':
	main()
No results found