Last active
September 4, 2017 08:38
-
-
Save Schwusch/a0cfb8ec68bf4c9253bc3e814c4b85fe to your computer and use it in GitHub Desktop.
Compare pdfs with different versions in bulk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dependencies: Python 3+, ImageMagick | |
from os import listdir, system, makedirs | |
from os.path import isfile, join, splitext | |
from multiprocessing import Pool | |
import errno | |
import argparse | |
def make_sure_path_exists(path): | |
try: | |
makedirs(path) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
def convert(file): | |
system("convert -density 150 ./{0}/{1} -quality 100 ./{0}_out/{2}-%d.png".format(file[1], file[0], | |
splitext(file[0])[0])) | |
def compare(file): | |
system("compare ./{0}_out/{2} ./{1}_out/{2} ./compare_out/{2}".format(args.new, args.old, file)) | |
def convert_all_pdfs_to_png(): | |
new_pdf_files = [[str(f), args.new] for f in listdir(args.new) if isfile(join(args.new, f))] | |
old_pdf_files = [[str(f), args.old] for f in listdir(args.old) if isfile(join(args.old, f))] | |
all_files = [] | |
all_files.extend(new_pdf_files) | |
all_files.extend(old_pdf_files) | |
for path in ["./{}_out".format(args.new), "./{}_out".format(args.old), "./compare_out"]: | |
make_sure_path_exists(path) | |
with Pool(8) as p: | |
p.map(convert, all_files) | |
def compare_old_and_new_pngs(): | |
new_png_files = [str(f) for f in listdir("./{}_out".format(args.new)) if isfile(join("./{}_out".format(args.new), f))] | |
old_png_files = [str(f) for f in listdir("./{}_out".format(args.old)) if isfile(join("./{}_out".format(args.old), f))] | |
new_set = set(new_png_files) | |
old_set = set(old_png_files) | |
for png in new_set: | |
if png in old_set: | |
compare(png) | |
new_png_files.remove(png) | |
old_png_files.remove(png) | |
if len(new_png_files) > 0: | |
print("---------------------------") | |
print("Images not processed in \"./{}\":".format(args.new)) | |
for png in new_png_files: | |
print(png) | |
if len(old_png_files) > 0: | |
print("---------------------------") | |
print("Images not processed in \"./{}\":".format(args.old)) | |
for png in old_png_files: | |
print(png) | |
def run(): | |
print("Converting PDF files to PNG...") | |
convert_all_pdfs_to_png() | |
print("Done converting.") | |
print("Comparing outputs from new and old...") | |
compare_old_and_new_pngs() | |
print("Done.") | |
parser = argparse.ArgumentParser(description='Compare different versions of pdfs and see the difference.') | |
parser.add_argument('--old', nargs='?', help='old pdfs folder', default="old") | |
parser.add_argument('--new', nargs='?', help='new pdfs folder', default="new") | |
args = parser.parse_args() | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment