dublado · May 29, 2024 16:26
diff --git a/extract_image_from_pdf.py b/extract_image_from_pdf.py
 import os
 from zipfile import ZipFile
 import xml.etree.ElementTree as ET
 from PIL import Image

 pptx_path="pptx.pptx"
 # Unzip the pptx file to explore its content
 with ZipFile(pptx_path, 'r') as zip_ref:
    zip_ref.extractall("pptx_extracted")

 # Path to the extracted folder
 extracted_path = "pptx_extracted"
 output_dir = "output"
 # Function to extract images from ppt/media folder
 def extract_images_from_pptx(extracted_path, output_dir):
    media_path = os.path.join(extracted_path, "ppt", "media")
    if os.path.exists(media_path):
        for media_file in os.listdir(media_path):
            if media_file.endswith(('.png', '.jpeg', '.jpg')):
                img = Image.open(os.path.join(media_path, media_file))
                img = img.convert("RGBA")  # Ensure transparency
                img.save(os.path.join(output_dir, media_file))

 # Extract images
 extract_images_from_pptx(extracted_path, output_dir)

 # List the extracted images
 extracted_images = os.listdir(output_dir)
 #extracted_images
	import os
	from zipfile import ZipFile
	import xml.etree.ElementTree as ET
	from PIL import Image

	pptx_path="pptx.pptx"
	# Unzip the pptx file to explore its content
	with ZipFile(pptx_path, 'r') as zip_ref:
	zip_ref.extractall("pptx_extracted")

	# Path to the extracted folder
	extracted_path = "pptx_extracted"
	output_dir = "output"
	# Function to extract images from ppt/media folder
	def extract_images_from_pptx(extracted_path, output_dir):
	media_path = os.path.join(extracted_path, "ppt", "media")
	if os.path.exists(media_path):
	for media_file in os.listdir(media_path):
	if media_file.endswith(('.png', '.jpeg', '.jpg')):
	img = Image.open(os.path.join(media_path, media_file))
	img = img.convert("RGBA") # Ensure transparency
	img.save(os.path.join(output_dir, media_file))

	# Extract images
	extract_images_from_pptx(extracted_path, output_dir)

	# List the extracted images
	extracted_images = os.listdir(output_dir)
	#extracted_images