Wolfenswan · April 25, 2019 15:22
diff --git a/image_processor.py b/image_processor.py
 """
 Bulk image processor + pdf creator

 I had to digitalize, sort and process a large amoung of archival material page by page. Unfortunately all pages only existed as single .jpgs.
 I wrote this script to automate the tedious process of renaming + resizing the images, as well as putting them in a single pdf.

 It will:
 - process all folders in it's current directory
 - create a backup of the original images
 - rename all images according to the name of the directory containing them
 - resize all images (atm. to dpi150 standards), maintaining ratio
 - create a pdf containing all images

 It is currently very much tailored to a specific job, but should need be can be made more dynamic and possibly cater to user input to some extent.

 """

 import os
 import shutil
 from reportlab.pdfgen.canvas import Canvas
 from PIL import Image, ExifTags

 BACKUP_DIR_NAME = '_originale'
 JPG_COMPRESSION = 80

 def process_folders(root_dir, backup_dir):
 	for dir, dirs, fileList in os.walk(root_dir, topdown=True):
 		dirs[:] = [d for d in dirs if not d.startswith(BACKUP_DIR_NAME)] # Ignore files in _backup folder
 		if dir != root_dir:
 			process_files(dir, fileList)

 def process_files(dir, fileList):
 	dir_name = os.path.basename(dir)
 	backup_dir_root = os.path.join(root_dir, BACKUP_DIR_NAME)
 	backup_dir = os.path.join(backup_dir_root, dir_name)
 	print(f'Accessing {dir_name}, containing {len(fileList)} files.')
 	if len([file for file in fileList if file[-4:] in ['.pdf','.PDF']]) == 0:
 		# Create a backup if necessary
 		if not os.path.isdir(backup_dir):
 			print(f'Creating backup of {dir_name}')
 			shutil.copytree(dir, backup_dir)
 		
 		# Create basic pdf file
 		pdf_path = os.path.join(dir,f'{dir_name}.pdf')
 		pdf = Canvas(pdf_path) # pageCompression = 

 		for i,fname in enumerate(fileList): # loop through all image files
 			file_type = fname[-4:]
 			file_path = os.path.join(dir,fname)
 			new_file = os.path.join(dir,f'{dir_name}_{i}{file_type}')
 			if os.path.isfile(new_file): # Current duplicate checking is awkward, as Python sorts files with _int suffixes as strings (_1,_10,_2 etc.). If this ever causes issues, apply proper sorting with a regex-check.
 				print('ERROR: file exists')
 				# TODO break?
 			elif file_type in ['.jpg','.JPG','.png','.PNG']:
 				print(f'Renaming{fname} to {dir_name}_{i}{file_type}')
 				os.rename(file_path,new_file)
 				image = scale_image(new_file)
 				width, height = image.size
 				pdf.setPageSize((width, height))	
 				pdf.drawImage(new_file, 0, 0, width, height,preserveAspectRatio=True)
 				pdf.showPage()
 		print('Writing pdf (might take a while...)')
 		pdf.save()
 	else:
 		print(f'PDF found, ignoring {dir_name}...')

 def scale_image(image_path):
 	image = Image.open(image_path)
 	w, h = image.size
 	size = (1754,1240) if w > h else (1240,1754) #Dpi150
 	#size = (842,595) if w > h else (595,842) # Dpi72
 	exif_data = {} # To properly rotate the image accessing the exif-values is required
 	exif_data_raw = image._getexif()
 	for tag, value in exif_data_raw.items():
 		decoded_tag = ExifTags.TAGS.get(tag, tag)
 		exif_data[decoded_tag] = value
 	if exif_data.get('Orientation',None):
 		orientation = exif_data['Orientation']
 		if orientation == 6:
 			image = image.rotate(270, expand=True)
 		# more orientations can be added as required, see: https://www.impulseadventure.com/photo/exif-orientation.html
 	image.thumbnail(size, Image.ANTIALIAS)
 	image.save(image_path,optimize=True,quality=JPG_COMPRESSION)
 	return image

 if __name__ == '__main__':
 	root_dir = os.getcwd()
 	backup_dir = os.path.join(root_dir, BACKUP_DIR_NAME)
 	if not os.path.isdir(backup_dir): # create backup main directory as required
 		os.mkdir(backup_dir)
 	process_folders(root_dir, backup_dir)
	"""
	Bulk image processor + pdf creator

	I had to digitalize, sort and process a large amoung of archival material page by page. Unfortunately all pages only existed as single .jpgs.
	I wrote this script to automate the tedious process of renaming + resizing the images, as well as putting them in a single pdf.

	It will:
	- process all folders in it's current directory
	- create a backup of the original images
	- rename all images according to the name of the directory containing them
	- resize all images (atm. to dpi150 standards), maintaining ratio
	- create a pdf containing all images

	It is currently very much tailored to a specific job, but should need be can be made more dynamic and possibly cater to user input to some extent.

	"""

	import os
	import shutil
	from reportlab.pdfgen.canvas import Canvas
	from PIL import Image, ExifTags

	BACKUP_DIR_NAME = '_originale'
	JPG_COMPRESSION = 80

	def process_folders(root_dir, backup_dir):
	for dir, dirs, fileList in os.walk(root_dir, topdown=True):
	dirs[:] = [d for d in dirs if not d.startswith(BACKUP_DIR_NAME)] # Ignore files in _backup folder
	if dir != root_dir:
	process_files(dir, fileList)

	def process_files(dir, fileList):
	dir_name = os.path.basename(dir)
	backup_dir_root = os.path.join(root_dir, BACKUP_DIR_NAME)
	backup_dir = os.path.join(backup_dir_root, dir_name)
	print(f'Accessing {dir_name}, containing {len(fileList)} files.')
	if len([file for file in fileList if file[-4:] in ['.pdf','.PDF']]) == 0:
	# Create a backup if necessary
	if not os.path.isdir(backup_dir):
	print(f'Creating backup of {dir_name}')
	shutil.copytree(dir, backup_dir)

	# Create basic pdf file
	pdf_path = os.path.join(dir,f'{dir_name}.pdf')
	pdf = Canvas(pdf_path) # pageCompression =

	for i,fname in enumerate(fileList): # loop through all image files
	file_type = fname[-4:]
	file_path = os.path.join(dir,fname)
	new_file = os.path.join(dir,f'{dir_name}_{i}{file_type}')
	if os.path.isfile(new_file): # Current duplicate checking is awkward, as Python sorts files with _int suffixes as strings (_1,_10,_2 etc.). If this ever causes issues, apply proper sorting with a regex-check.
	print('ERROR: file exists')
	# TODO break?
	elif file_type in ['.jpg','.JPG','.png','.PNG']:
	print(f'Renaming{fname} to {dir_name}_{i}{file_type}')
	os.rename(file_path,new_file)
	image = scale_image(new_file)
	width, height = image.size
	pdf.setPageSize((width, height))
	pdf.drawImage(new_file, 0, 0, width, height,preserveAspectRatio=True)
	pdf.showPage()
	print('Writing pdf (might take a while...)')
	pdf.save()
	else:
	print(f'PDF found, ignoring {dir_name}...')

	def scale_image(image_path):
	image = Image.open(image_path)
	w, h = image.size
	size = (1754,1240) if w > h else (1240,1754) #Dpi150
	#size = (842,595) if w > h else (595,842) # Dpi72
	exif_data = {} # To properly rotate the image accessing the exif-values is required
	exif_data_raw = image._getexif()
	for tag, value in exif_data_raw.items():
	decoded_tag = ExifTags.TAGS.get(tag, tag)
	exif_data[decoded_tag] = value
	if exif_data.get('Orientation',None):
	orientation = exif_data['Orientation']
	if orientation == 6:
	image = image.rotate(270, expand=True)
	# more orientations can be added as required, see: https://www.impulseadventure.com/photo/exif-orientation.html
	image.thumbnail(size, Image.ANTIALIAS)
	image.save(image_path,optimize=True,quality=JPG_COMPRESSION)
	return image

	if __name__ == '__main__':
	root_dir = os.getcwd()
	backup_dir = os.path.join(root_dir, BACKUP_DIR_NAME)
	if not os.path.isdir(backup_dir): # create backup main directory as required
	os.mkdir(backup_dir)
	process_folders(root_dir, backup_dir)