This was originally taken from https://superuser.com/a/1012669/894282 and then updated to be compatible with python 3.X
Requires pypdf2
to be installed.
This was originally taken from https://superuser.com/a/1012669/894282 and then updated to be compatible with python 3.X
Requires pypdf2
to be installed.
#! /usr/bin/env python | |
# Original author Nicholas Kim, modified by Yan Pashkovsky | |
# New license - GPL v3 | |
import sys | |
import time | |
from pathlib import Path | |
# from PyPDF2 import PdfReader, PdfWriter | |
# try: | |
# from PyPDF2.utils import PdfReadError | |
# except ImportError: | |
# from PyPDF2._reader import PdfReadError | |
from pypdf import PdfReader, PdfWriter | |
try: | |
from pypdf.utils import PdfReadError | |
except ImportError: | |
from pypdf._reader import PdfReadError | |
def eprint(*args, **kwargs): | |
"""Print to stderr | |
Taken from https://stackoverflow.com/a/14981125/7564988 | |
""" | |
print(*args, file=sys.stderr, **kwargs) | |
def get_cmdline_arguments(): | |
"""Retrieve command line arguments.""" | |
from optparse import OptionParser | |
usage_string = "%prog [-v] [-o output_name] [-b bookmarks_file] file1, file2 [, ...]" | |
parser = OptionParser(usage_string) | |
parser.add_option( | |
"-o", "--output", | |
dest="output_filename", | |
default=time.strftime("output_%Y%m%d_%H%M%S"), | |
help="Specify output filename (exclude .pdf extension); default is current date/time stamp" | |
) | |
parser.add_option( | |
"-b", "--bookmarks", | |
dest="bookmark_file", | |
default=None, | |
help="Specify the bookmark names for each file. The file should be new-line delimited and the number of lies must match the number of input files. If not given, the name of each file will be used as the bookmark name." | |
) | |
parser.add_option("-v", "--verbose", | |
action="store_true", dest="verbose", default=True, | |
help="Print detailed output (undoes quiet)") | |
parser.add_option("-q", "--quiet", | |
action="store_false", dest="verbose", default=True, | |
help="Do not print detailed output (undoes verbose)") | |
options, args = parser.parse_args() | |
if len(args) < 2: | |
parser.print_help() | |
sys.exit(1) | |
return options, args | |
def main(): | |
options, filenames = get_cmdline_arguments() | |
verboseprint = print if options.verbose else lambda *a, **k: None | |
output_pdf_name = options.output_filename + ".pdf" | |
files_to_merge = [] | |
bookmarks = [] | |
verboseprint(f"Output filename: {output_pdf_name}") | |
verboseprint(f"Input filenames:") | |
for f in filenames: | |
verboseprint(f"\t{f}") | |
# gather bookmark names | |
if options.bookmark_file: | |
bookmark_path = Path(options.bookmark_file) | |
assert bookmark_path.exists(), f"Bookmark file '{options.bookmark_file}' does not exist." | |
with bookmark_path.open() as file: | |
bookmarks = file.read().splitlines() | |
assert len(bookmarks) == len(filenames), f"Number of bookmarks in '{bookmark_path}' ({len(bookmarks)}) does not match the number of files ({len(filenames)})" | |
verboseprint(f"Bookmark Names:") | |
for b in bookmarks: | |
verboseprint(f"\t{b}") | |
else: | |
bookmarks = [filename for filename in filenames] | |
verboseprint(f"Bookmark Names: Same as filenames") | |
# get PDF files | |
for f in filenames: | |
try: | |
next_pdf_file = PdfReader(open(f, "rb")) | |
except(PdfReadError): | |
eprint("%s is not a valid PDF file." % f) | |
sys.exit(1) | |
except(IOError): | |
eprint("%s could not be found." % f) | |
sys.exit(1) | |
else: | |
files_to_merge.append(next_pdf_file) | |
# merge page by page | |
output_pdf_stream = PdfWriter() | |
j=0 | |
k=0 | |
for f in files_to_merge: | |
verboseprint(f"Adding {filenames[k]} to output") | |
for i in range(len(f.pages)): | |
output_pdf_stream.add_page(f.pages[i]) | |
if i==0: | |
output_pdf_stream.add_outline_item(str(bookmarks[k]),j) | |
j = j + 1 | |
k += 1 | |
# create output pdf file | |
verboseprint(f"Writing output file...") | |
try: | |
output_pdf_file = open(output_pdf_name, "wb") | |
output_pdf_stream.write(output_pdf_file) | |
finally: | |
output_pdf_file.close() | |
print("%s successfully created." % output_pdf_name) | |
if __name__ == "__main__": | |
main() |
Latest update adds the ability to customize what the bookmark labels to be added are and also adds more verbose output (with the option of disabling).