Created
May 4, 2020 14:16
-
-
Save carolinux/90c7be289d4b0a584e97d1034b04b148 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# need pandoc (if want to support txt), img2pdf (if want to support images) and pdfjoin (for pdfs) | |
# stitch together txt, images and pdf files in a folder to an out pdf | |
# filenames must begin with their desired sequence number ie 1_foo.pdf 2_beach.png 4_ending_notes.txt | |
# there can be gaps in the sequence | |
import os | |
import glob | |
import sys | |
folder = sys.argv[1] if len(sys.argv)>1 else '.' | |
out_pdf = sys.argv[2] if len(sys.argv)>2 else 'out.pdf' | |
pdfs_to_stich = [] | |
for ffn in glob.glob(folder+"/*"): | |
fn = os.path.basename(ffn) | |
# expecting files of the format 12_foo.jpg | |
parts = fn.split("_") | |
print(parts) | |
if len(parts)<2: | |
print(ffn+" skipped") | |
continue | |
try: | |
num = int(parts[0]) | |
except: | |
print(ffn+" skipped") | |
continue | |
ffn_without_ext, ext = os.path.splitext(ffn) | |
if ext in [".png", ".jpeg", ".jpg"]: | |
target_pdf = ffn_without_ext + '.pdf' | |
if os.path.exists(target_pdf): | |
print(ffn+" already converted") | |
continue | |
os.system("img2pdf {} -o {}".format(ffn, target_pdf)) | |
pdfs_to_stich.append((num, target_pdf)) | |
elif ext == '.pdf': | |
pdfs_to_stich.append((num, ffn)) | |
elif ext == '.txt': | |
target_pdf = ffn_without_ext + '.pdf' | |
if os.path.exists(target_pdf): | |
print(ffn+" already converted") | |
continue | |
os.system("pandoc {} -o {}".format(ffn, target_pdf)) | |
pdfs_to_stich.append((num, target_pdf)) | |
else: | |
print(ffn+" skipped") | |
pdfs_to_stich.sort(key=lambda x: x[0]) | |
base_folder = os.path.basename(folder) | |
out = base_folder+'_'+out_pdf # creates a foo_out.pdf under foo/ | |
cmd = "pdfjoin {} --rotateoversize 'false' --outfile {}".format(" ".join([x[1] for x in pdfs_to_stich]), os.path.join(folder, out)) | |
print("Running cmd: {}".format(cmd)) | |
os.system(cmd) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment