Last active
April 25, 2019 15:22
-
-
Save Wolfenswan/918e008ff8667af5287d10143f6e134d to your computer and use it in GitHub Desktop.
Bulk image processor + pdf creator using PIL & Reportlab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Bulk image processor + pdf creator | |
I had to digitalize, sort and process a large amoung of archival material page by page. Unfortunately all pages only existed as single .jpgs. | |
I wrote this script to automate the tedious process of renaming + resizing the images, as well as putting them in a single pdf. | |
It will: | |
- process all folders in it's current directory | |
- create a backup of the original images | |
- rename all images according to the name of the directory containing them | |
- resize all images (atm. to dpi150 standards), maintaining ratio | |
- create a pdf containing all images | |
It is currently very much tailored to a specific job, but should need be can be made more dynamic and possibly cater to user input to some extent. | |
""" | |
import os | |
import shutil | |
from reportlab.pdfgen.canvas import Canvas | |
from PIL import Image, ExifTags | |
BACKUP_DIR_NAME = '_originale' | |
JPG_COMPRESSION = 80 | |
def process_folders(root_dir, backup_dir): | |
for dir, dirs, fileList in os.walk(root_dir, topdown=True): | |
dirs[:] = [d for d in dirs if not d.startswith(BACKUP_DIR_NAME)] # Ignore files in _backup folder | |
if dir != root_dir: | |
process_files(dir, fileList) | |
def process_files(dir, fileList): | |
dir_name = os.path.basename(dir) | |
backup_dir_root = os.path.join(root_dir, BACKUP_DIR_NAME) | |
backup_dir = os.path.join(backup_dir_root, dir_name) | |
print(f'Accessing {dir_name}, containing {len(fileList)} files.') | |
if len([file for file in fileList if file[-4:] in ['.pdf','.PDF']]) == 0: | |
# Create a backup if necessary | |
if not os.path.isdir(backup_dir): | |
print(f'Creating backup of {dir_name}') | |
shutil.copytree(dir, backup_dir) | |
# Create basic pdf file | |
pdf_path = os.path.join(dir,f'{dir_name}.pdf') | |
pdf = Canvas(pdf_path) # pageCompression = | |
for i,fname in enumerate(fileList): # loop through all image files | |
file_type = fname[-4:] | |
file_path = os.path.join(dir,fname) | |
new_file = os.path.join(dir,f'{dir_name}_{i}{file_type}') | |
if os.path.isfile(new_file): # Current duplicate checking is awkward, as Python sorts files with _int suffixes as strings (_1,_10,_2 etc.). If this ever causes issues, apply proper sorting with a regex-check. | |
print('ERROR: file exists') | |
# TODO break? | |
elif file_type in ['.jpg','.JPG','.png','.PNG']: | |
print(f'Renaming{fname} to {dir_name}_{i}{file_type}') | |
os.rename(file_path,new_file) | |
image = scale_image(new_file) | |
width, height = image.size | |
pdf.setPageSize((width, height)) | |
pdf.drawImage(new_file, 0, 0, width, height,preserveAspectRatio=True) | |
pdf.showPage() | |
print('Writing pdf (might take a while...)') | |
pdf.save() | |
else: | |
print(f'PDF found, ignoring {dir_name}...') | |
def scale_image(image_path): | |
image = Image.open(image_path) | |
w, h = image.size | |
size = (1754,1240) if w > h else (1240,1754) #Dpi150 | |
#size = (842,595) if w > h else (595,842) # Dpi72 | |
exif_data = {} # To properly rotate the image accessing the exif-values is required | |
exif_data_raw = image._getexif() | |
for tag, value in exif_data_raw.items(): | |
decoded_tag = ExifTags.TAGS.get(tag, tag) | |
exif_data[decoded_tag] = value | |
if exif_data.get('Orientation',None): | |
orientation = exif_data['Orientation'] | |
if orientation == 6: | |
image = image.rotate(270, expand=True) | |
# more orientations can be added as required, see: https://www.impulseadventure.com/photo/exif-orientation.html | |
image.thumbnail(size, Image.ANTIALIAS) | |
image.save(image_path,optimize=True,quality=JPG_COMPRESSION) | |
return image | |
if __name__ == '__main__': | |
root_dir = os.getcwd() | |
backup_dir = os.path.join(root_dir, BACKUP_DIR_NAME) | |
if not os.path.isdir(backup_dir): # create backup main directory as required | |
os.mkdir(backup_dir) | |
process_folders(root_dir, backup_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment