flacle · January 14, 2022 00:38
diff --git a/extractImagesFromPPT.py b/extractImagesFromPPT.py
 # Author: Francis Laclé
 # 13/01/2022
 # Extract images from Powerpoint presentation files.
 # We assume PPT(X)'s are in subdirectories and we scan one level deep.
 # In case more depth is required, change the recursive flag to True on line 25.

 # Objective:
 # From the root directory, scan for PPT(X) files in sub directories (1-level)
 # Then, for each PPT(X) open it, extract the images and save them on the disk

 from glob import glob
 from pptx import Presentation
 from pptx.enum.shapes import MSO_SHAPE_TYPE

 def iter_picture_shapes(prs):
    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
                yield shape

 # Specify a "root" directory as the starting point
 def main_func(root_dir):
    # from https://stackoverflow.com/a/36426997/861597
    path = root_dir
    dirs = glob(root_dir + "/*/", recursive = False)
    for dir in dirs:
        files = glob(dir+"*.ppt*")
        for filename in files:
            # adapted from https://stackoverflow.com/a/52504408/861597
            c = 0
            for picture in iter_picture_shapes(Presentation(filename)):
                image = picture.image
                # ---get image "file" contents---
                image_bytes = image.blob
                image_filename = dir + str(c) + '_' + image.filename
                with open(image_filename, 'wb') as f:
                    f.write(image_bytes)
                    c = c + 1

 main_func('./some/dir/path/string')
	# Author: Francis Laclé
	# 13/01/2022
	# Extract images from Powerpoint presentation files.
	# We assume PPT(X)'s are in subdirectories and we scan one level deep.
	# In case more depth is required, change the recursive flag to True on line 25.

	# Objective:
	# From the root directory, scan for PPT(X) files in sub directories (1-level)
	# Then, for each PPT(X) open it, extract the images and save them on the disk

	from glob import glob
	from pptx import Presentation
	from pptx.enum.shapes import MSO_SHAPE_TYPE

	def iter_picture_shapes(prs):
	for slide in prs.slides:
	for shape in slide.shapes:
	if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
	yield shape

	# Specify a "root" directory as the starting point
	def main_func(root_dir):
	# from https://stackoverflow.com/a/36426997/861597
	path = root_dir
	dirs = glob(root_dir + "/*/", recursive = False)
	for dir in dirs:
	files = glob(dir+".ppt")
	for filename in files:
	# adapted from https://stackoverflow.com/a/52504408/861597
	c = 0
	for picture in iter_picture_shapes(Presentation(filename)):
	image = picture.image
	# ---get image "file" contents---
	image_bytes = image.blob
	image_filename = dir + str(c) + '_' + image.filename
	with open(image_filename, 'wb') as f:
	f.write(image_bytes)
	c = c + 1

	main_func('./some/dir/path/string')