Skip to content

Instantly share code, notes, and snippets.

@bobir01
Created March 19, 2025 07:55
Show Gist options
  • Save bobir01/3d34e2622ab2aed1e09c0d30fc49a0f6 to your computer and use it in GitHub Desktop.
Save bobir01/3d34e2622ab2aed1e09c0d30fc49a0f6 to your computer and use it in GitHub Desktop.
A utility class to convert Excel files directly to images without using PDF as an intermediary Works across platforms without requiring additional dependencies like Poppler
import os
import platform
import tempfile
from PIL import Image, ImageDraw, ImageFont
import openpyxl
from openpyxl.utils import get_column_letter
class ExcelToImage:
"""
A utility class to convert Excel files directly to images without using PDF as an intermediary
Works across platforms without requiring additional dependencies like Poppler
"""
def __init__(self, excel_file):
"""
Initialize with Excel file path
Parameters:
excel_file (str): Path to the Excel file
"""
self.excel_file = excel_file
self.system = platform.system()
def convert_with_win32com(self, output_file, sheet_name=None, image_format='PNG', dpi=300):
"""
Convert Excel to image using Windows COM interface (Windows only)
Parameters:
output_file (str): Path to save the output image
sheet_name (str, optional): Name of the sheet to convert
image_format (str): Image format (PNG, JPEG, etc.)
dpi (int): Image resolution in DPI
Returns:
str: Path to the generated image
"""
if self.system != 'Windows':
print("This method only works on Windows")
return None
try:
import win32com.client
# Get absolute path
abs_path = os.path.abspath(self.excel_file)
# Get Excel application
excel = win32com.client.Dispatch("Excel.Application")
excel.Visible = False
excel.DisplayAlerts = False
# Open workbook
wb = excel.Workbooks.Open(abs_path)
# Select sheet
if sheet_name:
try:
ws = wb.Sheets(sheet_name)
except:
print(f"Sheet '{sheet_name}' not found, using active sheet")
ws = wb.ActiveSheet
else:
ws = wb.ActiveSheet
# Ensure output directory exists
output_dir = os.path.dirname(output_file)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
# Export format constants
xlTypePDF = 0
xlQualityStandard = 0
# Create a temporary PDF file
temp_pdf = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False).name
# Export as PDF first (Excel doesn't directly support image export)
ws.ExportAsFixedFormat(
Type=xlTypePDF,
Filename=temp_pdf,
Quality=xlQualityStandard,
IncludeDocProperties=True,
IgnorePrintAreas=False,
OpenAfterPublish=False
)
# Close Excel
wb.Close(False)
excel.Quit()
# Now convert PDF to image using ImageMagick, Wand or PyMuPDF
result = self._convert_pdf_to_image(temp_pdf, output_file, image_format, dpi)
# Remove temporary PDF
if os.path.exists(temp_pdf):
os.remove(temp_pdf)
return result
except Exception as e:
print(f"Error in win32com conversion: {str(e)}")
return None
def _wrap_text(self, text, max_width, font):
"""
Wrap text to fit within the given width
Parameters:
text (str): Text to wrap
max_width (int): Maximum width in pixels
font (ImageFont): Font to use for text measurements
Returns:
list: List of wrapped text lines
"""
if not text:
return [""]
# Convert to string if not already
text = str(text)
# Split text into words
words = text.split()
if not words:
return [""]
# Start with the first word
lines = [words[0]]
current_line = 0
for word in words[1:]:
# Calculate width of current line + new word
test_line = lines[current_line] + " " + word
text_width = font.getlength(test_line)
if text_width <= max_width:
# Word fits on current line
lines[current_line] = test_line
else:
# Word doesn't fit, start a new line
lines.append(word)
current_line += 1
return lines
def convert_with_custom_renderer(self, output_file, sheet_name=None, image_format='PNG', max_width=3840, max_height=2160, scale_factor=2.0):
"""
Convert Excel to image using a custom pure Python renderer
Works on all platforms without external dependencies
Parameters:
output_file (str): Path to save the output image
sheet_name (str, optional): Name of the sheet to convert
image_format (str): Image format (PNG, JPEG, etc.)
max_width/max_height (int): Maximum dimensions for the output image
scale_factor (float): Scale factor for improving resolution (higher = better quality)
Returns:
str: Path to the generated image
"""
try:
# Load workbook
wb = openpyxl.load_workbook(self.excel_file)
# Select sheet
if sheet_name and sheet_name in wb.sheetnames:
ws = wb[sheet_name]
else:
ws = wb.active
# Determine dimensions
min_row, min_col = 1, 1
max_row = ws.max_row
max_col = ws.max_column
# Apply scale factor for better quality
base_cell_width = 100 # Base width in pixels
base_cell_height = 20 # Base height in pixels
# Scale dimensions for better quality
cell_width = int(base_cell_width * scale_factor)
cell_height = int(base_cell_height * scale_factor)
header_height = cell_height * 2 # Make header row twice as tall
# Calculate image dimensions
img_width = min(max_width, cell_width * max_col)
# Account for taller header row in height calculation
img_height = header_height + cell_height * (max_row - 1)
img_height = min(max_height, img_height)
# Create a new image with white background
img = Image.new('RGB', (img_width, img_height), color='white')
draw = ImageDraw.Draw(img)
# Try to load a font with larger size for better quality
font = None
font_size = int(10 * scale_factor) # Scale font size
try:
font = ImageFont.truetype("arial.ttf", font_size)
except:
try:
font = ImageFont.truetype("DejaVuSans.ttf", font_size)
except:
try:
# Try system fonts
if self.system == 'Windows':
font = ImageFont.truetype("C:\\Windows\\Fonts\\Arial.ttf", font_size)
elif self.system == 'Darwin': # macOS
font = ImageFont.truetype("/Library/Fonts/Arial.ttf", font_size)
else: # Linux
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
except:
font = ImageFont.load_default()
# Draw grid and content
for row in range(min_row, max_row + 1):
# Use taller height for header row
row_height = header_height if row == 1 else cell_height
for col in range(min_col, max_col + 1):
# Get cell position
x1 = (col - min_col) * cell_width
# Calculate y-position based on previous rows
y1 = 0
for r in range(min_row, row):
y1 += header_height if r == 1 else cell_height
x2 = x1 + cell_width
y2 = y1 + (header_height if row == 1 else cell_height)
# Draw cell borders with thicker lines for better visibility
border_width = max(1, int(scale_factor / 2)) # Scale border width
draw.rectangle([x1, y1, x2, y2], outline='gray', width=border_width)
# Get cell value
cell = ws.cell(row=row, column=col)
cell_value = ""
if cell.value is not None:
# Format numbers with thousands separators
if isinstance(cell.value, (int, float)) and not isinstance(cell.value, bool):
# Format with thousands separator and maintain decimal places for floats
if isinstance(cell.value, int):
cell_value = f"{cell.value:,}"
else:
# For floats, maintain up to 2 decimal places if needed
decimal_part = cell.value - int(cell.value)
if decimal_part == 0:
cell_value = f"{int(cell.value):,}"
else:
cell_value = f"{cell.value:,.2f}".rstrip('0').rstrip('.') if '.' in f"{cell.value:,.2f}" else f"{cell.value:,}"
else:
cell_value = str(cell.value)
# Get background color
bg_color = 'white'
if cell.fill.start_color and cell.fill.start_color.type == 'rgb':
rgb = cell.fill.start_color.rgb
if rgb and rgb != '00000000':
# Convert ARGB to RGB
if len(rgb) == 8:
rgb = rgb[2:] # Remove alpha channel
r = int(rgb[0:2], 16)
g = int(rgb[2:4], 16)
b = int(rgb[4:6], 16)
bg_color = (r, g, b)
# Fill cell with background color
draw.rectangle([x1+1, y1+1, x2-1, y2-1], fill=bg_color)
# Draw text
text_color = 'black'
if cell.font and cell.font.color:
if cell.font.color.type == 'rgb':
rgb = cell.font.color.rgb
if rgb and rgb != '00000000':
if len(rgb) == 8:
rgb = rgb[2:]
r = int(rgb[0:2], 16)
g = int(rgb[2:4], 16)
b = int(rgb[4:6], 16)
text_color = (r, g, b)
# Wrap text instead of truncating
wrapped_text = self._wrap_text(cell_value, cell_width - 6, font)
# Calculate vertical position to center text
text_height = len(wrapped_text) * (font.getbbox("A")[3] + 2)
y_offset = max(0, (cell_height - text_height) // 2)
# Draw each line of wrapped text
for i, line in enumerate(wrapped_text):
line_y = y1 + y_offset + i * (font.getbbox("A")[3] + 2)
draw.text((x1 + 3, line_y), line, fill=text_color, font=font)
# For better quality, consider using antialiasing if we're downscaling
if img_width > max_width or img_height > max_height:
orig_width, orig_height = img.size
aspect_ratio = orig_width / orig_height
if orig_width > max_width:
new_width = max_width
new_height = int(new_width / aspect_ratio)
else:
new_height = max_height
new_width = int(new_height * aspect_ratio)
# Resize with antialiasing
img = img.resize((new_width, new_height), Image.LANCZOS)
# Save the image with high quality
if image_format.upper() == 'JPEG':
img.save(output_file, format=image_format, quality=95)
elif image_format.upper() == 'PNG':
img.save(output_file, format=image_format, compress_level=1) # Lower compression for better quality
else:
img.save(output_file, format=image_format)
print(f"Image saved as {output_file} using custom renderer")
return output_file
except Exception as e:
print(f"Error in custom rendering: {str(e)}")
return None
def _convert_pdf_to_image(self, pdf_file, output_file, image_format='PNG', dpi=300):
"""
Helper method to convert PDF to image using available libraries
"""
# Try different methods to convert PDF to image
# Method 1: Try using wand (ImageMagick binding) if available
try:
from wand.image import Image as WandImage
with WandImage(filename=pdf_file, resolution=dpi) as img:
img.format = image_format
img.save(filename=output_file)
print(f"Image saved as {output_file} using Wand/ImageMagick")
return output_file
except ImportError:
# Method 2: Try using ImageMagick command-line if available
try:
import subprocess
cmd = ['convert', '-density', str(dpi), pdf_file, output_file]
subprocess.run(cmd, check=True)
print(f"Image saved as {output_file} using ImageMagick CLI")
return output_file
except (subprocess.SubprocessError, FileNotFoundError):
# Method 3: Try using PyMuPDF (fitz) if available
try:
import fitz # PyMuPDF
pdf_document = fitz.open(pdf_file)
page = pdf_document[0] # Get the first page
# Set resolution factor
zoom_factor = dpi / 72 # 72 is the base DPI for PDF
mat = fitz.Matrix(zoom_factor, zoom_factor)
# Render page to a pixmap
pix = page.get_pixmap(matrix=mat)
pix.save(output_file)
pdf_document.close()
print(f"Image saved as {output_file} using PyMuPDF")
return output_file
except ImportError:
print("Could not convert PDF to image. Please install one of these packages:")
print("1. Wand: pip install wand (requires ImageMagick)")
print("2. PyMuPDF: pip install pymupdf")
print("3. ImageMagick CLI: https://imagemagick.org/")
return None
return None
def convert(self, output_file=None, sheet_name=None, image_format='PNG', dpi=300):
"""
Convert Excel to image using the best available method for the current platform
Parameters:
output_file (str, optional): Path to save the output image
sheet_name (str, optional): Name of the sheet to convert
image_format (str): Image format (PNG, JPEG, etc.)
dpi (int): Image resolution in DPI
Returns:
str: Path to the generated image
"""
# Determine output file if not provided
if not output_file:
output_file = os.path.splitext(self.excel_file)[0] + '.' + image_format.lower()
# Try Windows COM method first if on Windows
if self.system == 'Windows':
result = self.convert_with_win32com(output_file, sheet_name, image_format, dpi)
if result:
return result
# Fall back to custom renderer
return self.convert_with_custom_renderer(output_file, sheet_name, image_format)
# Example usage
if __name__ == "__main__":
converter = ExcelToImage("sample_file.xlsx")
# Method 1: Let the converter choose the best method
result = converter.convert(output_file="output_image.png")
# Method 2: Force Windows COM method (Windows only)
# result = converter.convert_with_win32com("output_image.png")
# Method 3: Force custom renderer (cross-platform, no dependencies)
# result = converter.convert_with_custom_renderer("output_image.png")
openpyxl
pillow
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment