florian-obradovic · August 20, 2024 09:34
diff --git a/embedd-images-into-html-base64.py b/embedd-images-into-html-base64.py
 # This Python script parses through all HTML files and replaces the external image references with embedded base64 encoded strings.

 ## create a directory
 # mkdir ~/path2convert && cd ~/path2convert
 # git init # optional to revert changes
 # git add -A && git commit -m "Start" # optional to revert changes

 ## create a virtual environment
 # python3 -m venv ./
 # source ./bin/activate
 # pip3 install beautifulsoup4 pillow
 ## Example: python3 embedd-images-into-html-base64.py

 import os
 import base64
 from bs4 import BeautifulSoup
 from urllib.parse import unquote

 # Directory containing your HTML files
 html_files_directory = os.path.expanduser('~/Downloads/paname-convert')

 # Function to convert image to base64
 def img_to_base64(img_path):
    with open(img_path, 'rb') as img_file:
        encoded_string = base64.b64encode(img_file.read()).decode('utf-8')
    return encoded_string

 # Function to process a single HTML file
 def process_html_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'html.parser')

    # Find all image tags
    for img in soup.find_all('img'):
        img_src = img.get('src')
        if img_src and not img_src.startswith('data:'):  # Only process if it's not already base64
            decoded_img_src = unquote(img_src)  # Decode the URL-encoded path
            img_path = os.path.join(os.path.dirname(file_path), decoded_img_src)
            
            # Check if the image file exists
            if os.path.exists(img_path):
                mime_type = f"image/{img_path.split('.')[-1]}"
                base64_string = img_to_base64(img_path)
                img['src'] = f"data:{mime_type};base64,{base64_string}"
                print(f"Replaced image {img_src} with base64 in {file_path}")
            else:
                print(f"Image not found: {img_path}")
        else:
            print(f"Skipping image {img_src} in {file_path} (already base64 or invalid)")

    # Save the modified HTML back to the file
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(str(soup))

 # Function to process all HTML files in a directory
 def process_html_files_in_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.html'):
                process_html_file(os.path.join(root, file))



 # Process all HTML files in the specified directory
 process_html_files_in_directory(html_files_directory)

 print("All HTML files have been processed and images have been embedded as Base64.")
	# This Python script parses through all HTML files and replaces the external image references with embedded base64 encoded strings.

	## create a directory
	# mkdir ~/path2convert && cd ~/path2convert
	# git init # optional to revert changes
	# git add -A && git commit -m "Start" # optional to revert changes

	## create a virtual environment
	# python3 -m venv ./
	# source ./bin/activate
	# pip3 install beautifulsoup4 pillow
	## Example: python3 embedd-images-into-html-base64.py

	import os
	import base64
	from bs4 import BeautifulSoup
	from urllib.parse import unquote

	# Directory containing your HTML files
	html_files_directory = os.path.expanduser('~/Downloads/paname-convert')

	# Function to convert image to base64
	def img_to_base64(img_path):
	with open(img_path, 'rb') as img_file:
	encoded_string = base64.b64encode(img_file.read()).decode('utf-8')
	return encoded_string

	# Function to process a single HTML file
	def process_html_file(file_path):
	with open(file_path, 'r', encoding='utf-8') as file:
	soup = BeautifulSoup(file, 'html.parser')

	# Find all image tags
	for img in soup.find_all('img'):
	img_src = img.get('src')
	if img_src and not img_src.startswith('data:'): # Only process if it's not already base64
	decoded_img_src = unquote(img_src) # Decode the URL-encoded path
	img_path = os.path.join(os.path.dirname(file_path), decoded_img_src)

	# Check if the image file exists
	if os.path.exists(img_path):
	mime_type = f"image/{img_path.split('.')[-1]}"
	base64_string = img_to_base64(img_path)
	img['src'] = f"data:{mime_type};base64,{base64_string}"
	print(f"Replaced image {img_src} with base64 in {file_path}")
	else:
	print(f"Image not found: {img_path}")
	else:
	print(f"Skipping image {img_src} in {file_path} (already base64 or invalid)")

	# Save the modified HTML back to the file
	with open(file_path, 'w', encoding='utf-8') as file:
	file.write(str(soup))

	# Function to process all HTML files in a directory
	def process_html_files_in_directory(directory):
	for root, dirs, files in os.walk(directory):
	for file in files:
	if file.endswith('.html'):
	process_html_file(os.path.join(root, file))



	# Process all HTML files in the specified directory
	process_html_files_in_directory(html_files_directory)

	print("All HTML files have been processed and images have been embedded as Base64.")