Skip to content

Instantly share code, notes, and snippets.

@vega113
Created April 25, 2023 12:37
Show Gist options
  • Save vega113/10d4450a408d46db48e35654ecc53304 to your computer and use it in GitHub Desktop.
Save vega113/10d4450a408d46db48e35654ecc53304 to your computer and use it in GitHub Desktop.
import os
import requests
import pandas as pd
def download_expense(link, file_name, folder):
response = requests.get(link)
if response.status_code == 200:
os.makedirs(folder, exist_ok=True)
with open(os.path.join(folder, file_name), 'wb') as f:
f.write(response.content)
else:
print(f"Failed to download file from {link}")
def parse_and_download_expenses(file_path):
df = pd.read_excel(file_path)
root_folder = "Expenses"
for _, row in df.iterrows():
try:
document_date = pd.to_datetime(row.iloc[4])
except:
document_date = pd.to_datetime(row.iloc[3])
expense_description = row.iloc[9].replace('.', '_')
total_amount = str(row.iloc[6]).replace('.', '_')
link = row.iloc[18]
file_name = f"{document_date.strftime('%Y-%m-%d')}_{expense_description}_{total_amount}.jpg"
folder = os.path.join(root_folder, document_date.strftime('%Y-%m'))
download_expense(link, file_name, folder)
print(f"Downloaded: {file_name} to folder {folder}")
if __name__ == "__main__":
file_path = "path/to/your/xlsx/file.xlsx"
parse_and_download_expenses(file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment