Last active
August 30, 2017 10:25
-
-
Save PolarNick239/154dd67d69d4181deb1903ad2ecd5a66 to your computer and use it in GitHub Desktop.
PDF from images. Script downscales all jpg images and then merges them into pdfs with pdftk.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3.5 | |
import os | |
import glob | |
import pathlib | |
from pathlib import Path | |
# How to use: | |
# Run in directory with folders named from 1 and up to 99. Each folder should contain *.JPG files. | |
# Script will downscale all files that are not yet downscaled to "downscaled" subdirectory and stitch to multiple pdfs: pdf per folder + single full pdf. | |
# Example of input directories: | |
#├── 1 | |
#│ ├── DSCF2853.JPG | |
#│ ├── DSCF2854.JPG | |
#│ ├── DSCF2855.JPG | |
#│ ├── DSCF2856.JPG | |
#│ └── DSCF2857.JPG | |
#├── 2 | |
#│ ├── DSCF2858.JPG | |
#│ ├── DSCF2859.JPG | |
#│ ├── DSCF2860.JPG | |
#│ └── DSCF2861.JPG | |
#├── 3 | |
#│ ├── DSCF2862.JPG | |
#│ ├── DSCF2863.JPG | |
#│ ├── DSCF2864.JPG | |
#│ ├── DSCF2865.JPG | |
#│ └── DSCF2866.JPG | |
#└── stitch_pdf.py | |
# | |
# Example of output: | |
#├── 1 | |
#│ ├── DSCF2853.JPG | |
#│ ├── DSCF2854.JPG | |
#│ ├── DSCF2855.JPG | |
#│ ├── DSCF2856.JPG | |
#│ └── DSCF2857.JPG | |
#├── 2 | |
#│ ├── DSCF2858.JPG | |
#│ ├── DSCF2859.JPG | |
#│ ├── DSCF2860.JPG | |
#│ └── DSCF2861.JPG | |
#├── 3 | |
#│ ├── DSCF2862.JPG | |
#│ ├── DSCF2863.JPG | |
#│ ├── DSCF2864.JPG | |
#│ ├── DSCF2865.JPG | |
#│ └── DSCF2866.JPG | |
#├── downscaled | |
#│ ├── 1 | |
#│ │ ├── DSCF2853.JPG | |
#│ │ ├── DSCF2854.JPG | |
#│ │ ├── DSCF2855.JPG | |
#│ │ ├── DSCF2856.JPG | |
#│ │ └── DSCF2857.JPG | |
#│ ├── 2 | |
#│ │ ├── DSCF2858.JPG | |
#│ │ ├── DSCF2859.JPG | |
#│ │ ├── DSCF2860.JPG | |
#│ │ └── DSCF2861.JPG | |
#│ └── 3 | |
#│ ├── DSCF2862.JPG | |
#│ ├── DSCF2863.JPG | |
#│ ├── DSCF2864.JPG | |
#│ ├── DSCF2865.JPG | |
#│ └── DSCF2866.JPG | |
#├── stitch_pdf.py | |
#└── pdfs | |
# ├── theory01.pdf | |
# ├── theory02.pdf | |
# ├── theory03.pdf | |
# └── theory.pdf | |
filepaths = glob.glob("./*") | |
day_paths = [] | |
for filepath in filepaths: | |
filename = os.path.basename(filepath) | |
try: | |
day_number = int(filename) | |
day_paths.append(filepath) | |
except ValueError: | |
pass | |
day_paths = sorted(day_paths) | |
print("Directories to be processed: ") | |
for day_path in day_paths: | |
print(day_path) | |
pdf_dir = "./pdfs" | |
pathlib.Path(pdf_dir).mkdir(parents=True, exist_ok=True) | |
pdf_files = [] | |
for day_path in day_paths: | |
dir_name = os.path.basename(day_path) | |
day_number = int(dir_name) | |
print("Processing day {}...".format(day_number)) | |
day_images = sorted(glob.glob(day_path + "/*.JPG")) | |
downscaled_dir = "./downscaled/" + str(day_number) | |
pathlib.Path(downscaled_dir).mkdir(parents=True, exist_ok=True) | |
skipped = True | |
downscaled_images = [] | |
for day_image in day_images: | |
image_name = os.path.basename(day_image) | |
downscaled_path = downscaled_dir + "/" + image_name | |
downscaled_images.append(downscaled_path) | |
if Path(downscaled_path).is_file(): | |
continue | |
else: | |
skipped = False | |
os.system("convert -resize 25% -quality 90% {} {}".format(day_image, downscaled_path)) | |
if skipped: | |
print("Images already downscaled!") | |
else: | |
print("Images downscaled!") | |
pdf_file = pdf_dir + "/theory{:02}.pdf".format(day_number) | |
os.system("convert {} {}".format(" ".join(downscaled_images), pdf_file)) | |
pdf_files.append(pdf_file) | |
pdf_files = sorted(pdf_files) | |
os.system("pdftk {} cat output {}".format(" ".join(pdf_files), pdf_dir + "/theory.pdf")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment