Created
March 19, 2025 07:55
-
-
Save bobir01/7e655d07f5e55f2518c06c749dfff662 to your computer and use it in GitHub Desktop.
A python utility to convert Excel files directly to images without using PDF as an intermediary Works across platforms without requiring additional dependencies like Poppler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import platform | |
import tempfile | |
from PIL import Image, ImageDraw, ImageFont | |
import openpyxl | |
from openpyxl.utils import get_column_letter | |
class ExcelToImage: | |
""" | |
A utility class to convert Excel files directly to images without using PDF as an intermediary | |
Works across platforms without requiring additional dependencies like Poppler | |
""" | |
def __init__(self, excel_file): | |
""" | |
Initialize with Excel file path | |
Parameters: | |
excel_file (str): Path to the Excel file | |
""" | |
self.excel_file = excel_file | |
self.system = platform.system() | |
def convert_with_win32com(self, output_file, sheet_name=None, image_format='PNG', dpi=300): | |
""" | |
Convert Excel to image using Windows COM interface (Windows only) | |
Parameters: | |
output_file (str): Path to save the output image | |
sheet_name (str, optional): Name of the sheet to convert | |
image_format (str): Image format (PNG, JPEG, etc.) | |
dpi (int): Image resolution in DPI | |
Returns: | |
str: Path to the generated image | |
""" | |
if self.system != 'Windows': | |
print("This method only works on Windows") | |
return None | |
try: | |
import win32com.client | |
# Get absolute path | |
abs_path = os.path.abspath(self.excel_file) | |
# Get Excel application | |
excel = win32com.client.Dispatch("Excel.Application") | |
excel.Visible = False | |
excel.DisplayAlerts = False | |
# Open workbook | |
wb = excel.Workbooks.Open(abs_path) | |
# Select sheet | |
if sheet_name: | |
try: | |
ws = wb.Sheets(sheet_name) | |
except: | |
print(f"Sheet '{sheet_name}' not found, using active sheet") | |
ws = wb.ActiveSheet | |
else: | |
ws = wb.ActiveSheet | |
# Ensure output directory exists | |
output_dir = os.path.dirname(output_file) | |
if output_dir and not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# Export format constants | |
xlTypePDF = 0 | |
xlQualityStandard = 0 | |
# Create a temporary PDF file | |
temp_pdf = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False).name | |
# Export as PDF first (Excel doesn't directly support image export) | |
ws.ExportAsFixedFormat( | |
Type=xlTypePDF, | |
Filename=temp_pdf, | |
Quality=xlQualityStandard, | |
IncludeDocProperties=True, | |
IgnorePrintAreas=False, | |
OpenAfterPublish=False | |
) | |
# Close Excel | |
wb.Close(False) | |
excel.Quit() | |
# Now convert PDF to image using ImageMagick, Wand or PyMuPDF | |
result = self._convert_pdf_to_image(temp_pdf, output_file, image_format, dpi) | |
# Remove temporary PDF | |
if os.path.exists(temp_pdf): | |
os.remove(temp_pdf) | |
return result | |
except Exception as e: | |
print(f"Error in win32com conversion: {str(e)}") | |
return None | |
def _wrap_text(self, text, max_width, font): | |
""" | |
Wrap text to fit within the given width | |
Parameters: | |
text (str): Text to wrap | |
max_width (int): Maximum width in pixels | |
font (ImageFont): Font to use for text measurements | |
Returns: | |
list: List of wrapped text lines | |
""" | |
if not text: | |
return [""] | |
# Convert to string if not already | |
text = str(text) | |
# Split text into words | |
words = text.split() | |
if not words: | |
return [""] | |
# Start with the first word | |
lines = [words[0]] | |
current_line = 0 | |
for word in words[1:]: | |
# Calculate width of current line + new word | |
test_line = lines[current_line] + " " + word | |
text_width = font.getlength(test_line) | |
if text_width <= max_width: | |
# Word fits on current line | |
lines[current_line] = test_line | |
else: | |
# Word doesn't fit, start a new line | |
lines.append(word) | |
current_line += 1 | |
return lines | |
def convert_with_custom_renderer(self, output_file, sheet_name=None, image_format='PNG', max_width=3840, max_height=2160, scale_factor=2.0): | |
""" | |
Convert Excel to image using a custom pure Python renderer | |
Works on all platforms without external dependencies | |
Parameters: | |
output_file (str): Path to save the output image | |
sheet_name (str, optional): Name of the sheet to convert | |
image_format (str): Image format (PNG, JPEG, etc.) | |
max_width/max_height (int): Maximum dimensions for the output image | |
scale_factor (float): Scale factor for improving resolution (higher = better quality) | |
Returns: | |
str: Path to the generated image | |
""" | |
try: | |
# Load workbook | |
wb = openpyxl.load_workbook(self.excel_file) | |
# Select sheet | |
if sheet_name and sheet_name in wb.sheetnames: | |
ws = wb[sheet_name] | |
else: | |
ws = wb.active | |
# Determine dimensions | |
min_row, min_col = 1, 1 | |
max_row = ws.max_row | |
max_col = ws.max_column | |
# Apply scale factor for better quality | |
base_cell_width = 100 # Base width in pixels | |
base_cell_height = 20 # Base height in pixels | |
# Scale dimensions for better quality | |
cell_width = int(base_cell_width * scale_factor) | |
cell_height = int(base_cell_height * scale_factor) | |
header_height = cell_height * 2 # Make header row twice as tall | |
# Calculate image dimensions | |
img_width = min(max_width, cell_width * max_col) | |
# Account for taller header row in height calculation | |
img_height = header_height + cell_height * (max_row - 1) | |
img_height = min(max_height, img_height) | |
# Create a new image with white background | |
img = Image.new('RGB', (img_width, img_height), color='white') | |
draw = ImageDraw.Draw(img) | |
# Try to load a font with larger size for better quality | |
font = None | |
font_size = int(10 * scale_factor) # Scale font size | |
try: | |
font = ImageFont.truetype("arial.ttf", font_size) | |
except: | |
try: | |
font = ImageFont.truetype("DejaVuSans.ttf", font_size) | |
except: | |
try: | |
# Try system fonts | |
if self.system == 'Windows': | |
font = ImageFont.truetype("C:\\Windows\\Fonts\\Arial.ttf", font_size) | |
elif self.system == 'Darwin': # macOS | |
font = ImageFont.truetype("/Library/Fonts/Arial.ttf", font_size) | |
else: # Linux | |
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size) | |
except: | |
font = ImageFont.load_default() | |
# Draw grid and content | |
for row in range(min_row, max_row + 1): | |
# Use taller height for header row | |
row_height = header_height if row == 1 else cell_height | |
for col in range(min_col, max_col + 1): | |
# Get cell position | |
x1 = (col - min_col) * cell_width | |
# Calculate y-position based on previous rows | |
y1 = 0 | |
for r in range(min_row, row): | |
y1 += header_height if r == 1 else cell_height | |
x2 = x1 + cell_width | |
y2 = y1 + (header_height if row == 1 else cell_height) | |
# Draw cell borders with thicker lines for better visibility | |
border_width = max(1, int(scale_factor / 2)) # Scale border width | |
draw.rectangle([x1, y1, x2, y2], outline='gray', width=border_width) | |
# Get cell value | |
cell = ws.cell(row=row, column=col) | |
cell_value = "" | |
if cell.value is not None: | |
# Format numbers with thousands separators | |
if isinstance(cell.value, (int, float)) and not isinstance(cell.value, bool): | |
# Format with thousands separator and maintain decimal places for floats | |
if isinstance(cell.value, int): | |
cell_value = f"{cell.value:,}" | |
else: | |
# For floats, maintain up to 2 decimal places if needed | |
decimal_part = cell.value - int(cell.value) | |
if decimal_part == 0: | |
cell_value = f"{int(cell.value):,}" | |
else: | |
cell_value = f"{cell.value:,.2f}".rstrip('0').rstrip('.') if '.' in f"{cell.value:,.2f}" else f"{cell.value:,}" | |
else: | |
cell_value = str(cell.value) | |
# Get background color | |
bg_color = 'white' | |
if cell.fill.start_color and cell.fill.start_color.type == 'rgb': | |
rgb = cell.fill.start_color.rgb | |
if rgb and rgb != '00000000': | |
# Convert ARGB to RGB | |
if len(rgb) == 8: | |
rgb = rgb[2:] # Remove alpha channel | |
r = int(rgb[0:2], 16) | |
g = int(rgb[2:4], 16) | |
b = int(rgb[4:6], 16) | |
bg_color = (r, g, b) | |
# Fill cell with background color | |
draw.rectangle([x1+1, y1+1, x2-1, y2-1], fill=bg_color) | |
# Draw text | |
text_color = 'black' | |
if cell.font and cell.font.color: | |
if cell.font.color.type == 'rgb': | |
rgb = cell.font.color.rgb | |
if rgb and rgb != '00000000': | |
if len(rgb) == 8: | |
rgb = rgb[2:] | |
r = int(rgb[0:2], 16) | |
g = int(rgb[2:4], 16) | |
b = int(rgb[4:6], 16) | |
text_color = (r, g, b) | |
# Wrap text instead of truncating | |
wrapped_text = self._wrap_text(cell_value, cell_width - 6, font) | |
# Calculate vertical position to center text | |
text_height = len(wrapped_text) * (font.getbbox("A")[3] + 2) | |
y_offset = max(0, (cell_height - text_height) // 2) | |
# Draw each line of wrapped text | |
for i, line in enumerate(wrapped_text): | |
line_y = y1 + y_offset + i * (font.getbbox("A")[3] + 2) | |
draw.text((x1 + 3, line_y), line, fill=text_color, font=font) | |
# For better quality, consider using antialiasing if we're downscaling | |
if img_width > max_width or img_height > max_height: | |
orig_width, orig_height = img.size | |
aspect_ratio = orig_width / orig_height | |
if orig_width > max_width: | |
new_width = max_width | |
new_height = int(new_width / aspect_ratio) | |
else: | |
new_height = max_height | |
new_width = int(new_height * aspect_ratio) | |
# Resize with antialiasing | |
img = img.resize((new_width, new_height), Image.LANCZOS) | |
# Save the image with high quality | |
if image_format.upper() == 'JPEG': | |
img.save(output_file, format=image_format, quality=95) | |
elif image_format.upper() == 'PNG': | |
img.save(output_file, format=image_format, compress_level=1) # Lower compression for better quality | |
else: | |
img.save(output_file, format=image_format) | |
print(f"Image saved as {output_file} using custom renderer") | |
return output_file | |
except Exception as e: | |
print(f"Error in custom rendering: {str(e)}") | |
return None | |
def _convert_pdf_to_image(self, pdf_file, output_file, image_format='PNG', dpi=300): | |
""" | |
Helper method to convert PDF to image using available libraries | |
""" | |
# Try different methods to convert PDF to image | |
# Method 1: Try using wand (ImageMagick binding) if available | |
try: | |
from wand.image import Image as WandImage | |
with WandImage(filename=pdf_file, resolution=dpi) as img: | |
img.format = image_format | |
img.save(filename=output_file) | |
print(f"Image saved as {output_file} using Wand/ImageMagick") | |
return output_file | |
except ImportError: | |
# Method 2: Try using ImageMagick command-line if available | |
try: | |
import subprocess | |
cmd = ['convert', '-density', str(dpi), pdf_file, output_file] | |
subprocess.run(cmd, check=True) | |
print(f"Image saved as {output_file} using ImageMagick CLI") | |
return output_file | |
except (subprocess.SubprocessError, FileNotFoundError): | |
# Method 3: Try using PyMuPDF (fitz) if available | |
try: | |
import fitz # PyMuPDF | |
pdf_document = fitz.open(pdf_file) | |
page = pdf_document[0] # Get the first page | |
# Set resolution factor | |
zoom_factor = dpi / 72 # 72 is the base DPI for PDF | |
mat = fitz.Matrix(zoom_factor, zoom_factor) | |
# Render page to a pixmap | |
pix = page.get_pixmap(matrix=mat) | |
pix.save(output_file) | |
pdf_document.close() | |
print(f"Image saved as {output_file} using PyMuPDF") | |
return output_file | |
except ImportError: | |
print("Could not convert PDF to image. Please install one of these packages:") | |
print("1. Wand: pip install wand (requires ImageMagick)") | |
print("2. PyMuPDF: pip install pymupdf") | |
print("3. ImageMagick CLI: https://imagemagick.org/") | |
return None | |
return None | |
def convert(self, output_file=None, sheet_name=None, image_format='PNG', dpi=300): | |
""" | |
Convert Excel to image using the best available method for the current platform | |
Parameters: | |
output_file (str, optional): Path to save the output image | |
sheet_name (str, optional): Name of the sheet to convert | |
image_format (str): Image format (PNG, JPEG, etc.) | |
dpi (int): Image resolution in DPI | |
Returns: | |
str: Path to the generated image | |
""" | |
# Determine output file if not provided | |
if not output_file: | |
output_file = os.path.splitext(self.excel_file)[0] + '.' + image_format.lower() | |
# Try Windows COM method first if on Windows | |
if self.system == 'Windows': | |
result = self.convert_with_win32com(output_file, sheet_name, image_format, dpi) | |
if result: | |
return result | |
# Fall back to custom renderer | |
return self.convert_with_custom_renderer(output_file, sheet_name, image_format) | |
# Example usage | |
if __name__ == "__main__": | |
converter = ExcelToImage("sample_file.xlsx") | |
# Method 1: Let the converter choose the best method | |
result = converter.convert(output_file="output_image.png") | |
# Method 2: Force Windows COM method (Windows only) | |
# result = converter.convert_with_win32com("output_image.png") | |
# Method 3: Force custom renderer (cross-platform, no dependencies) | |
# result = converter.convert_with_custom_renderer("output_image.png") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
openpyxl | |
pillow |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment