Created
December 19, 2024 10:54
-
-
Save duartefdias/3d8ebaed70cd5da41431df7b604d8505 to your computer and use it in GitHub Desktop.
Convert excel file into csv keeping formulas (for use with LLMs)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openpyxl | |
import csv | |
def excel_to_csv_with_formulas(excel_path, csv_path, sheet_index=None): | |
""" | |
Convert Excel file to CSV, preserving formulas. | |
Args: | |
excel_path: Path to input Excel file | |
csv_path: Path to output CSV file | |
sheet_index: Optional index of specific sheet to convert (0-based). | |
If None, converts all sheets. | |
""" | |
# Load the workbook | |
wb = openpyxl.load_workbook(excel_path, data_only=False) | |
# Prepare to write to CSV | |
with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile: | |
csvwriter = csv.writer(csvfile) | |
if sheet_index is not None: | |
# Convert single sheet | |
if sheet_index >= len(wb.worksheets): | |
raise ValueError(f"Sheet index {sheet_index} is out of range. File has {len(wb.worksheets)} sheets.") | |
ws = wb.worksheets[sheet_index] | |
csvwriter.writerow([f'----SHEET {sheet_index + 1}: {ws.title}----']) | |
for row in ws.iter_rows(): | |
csv_row = [] | |
for cell in row: | |
if cell.data_type == 'f': | |
csv_row.append(f'={cell.value}') | |
else: | |
csv_row.append(cell.value) | |
csvwriter.writerow(csv_row) | |
print(f"Converted sheet {sheet_index + 1} ({ws.title}) from {excel_path} to {csv_path}") | |
else: | |
# Convert all sheets | |
for sheet_index, ws in enumerate(wb.worksheets): | |
csvwriter.writerow([f'----SHEET {sheet_index + 1}: {ws.title}----']) | |
for row in ws.iter_rows(): | |
csv_row = [] | |
for cell in row: | |
if cell.data_type == 'f': | |
csv_row.append(f'={cell.value}') | |
else: | |
csv_row.append(cell.value) | |
csvwriter.writerow(csv_row) | |
# Add a blank line between sheets (except for the last sheet) | |
if sheet_index < len(wb.worksheets) - 1: | |
csvwriter.writerow([]) | |
print(f"Converted all sheets from {excel_path} to {csv_path}") | |
# Example usage | |
# Convert all sheets | |
excel_to_csv_with_formulas('input.xlsx', 'output.csv', sheet_index=3) | |
# Convert specific sheet (e.g., second sheet, index 1) | |
# excel_to_csv_with_formulas('input.xlsx', 'output.csv', sheet_index=1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment