This was originally taken from https://superuser.com/a/1012669/894282 and then updated to be compatible with python 3.X
Requires pypdf2 to be installed.
This was originally taken from https://superuser.com/a/1012669/894282 and then updated to be compatible with python 3.X
Requires pypdf2 to be installed.
| #! /usr/bin/env python | |
| # Original author Nicholas Kim, modified by Yan Pashkovsky | |
| # New license - GPL v3 | |
| import sys | |
| import time | |
| from pathlib import Path | |
| # from PyPDF2 import PdfReader, PdfWriter | |
| # try: | |
| # from PyPDF2.utils import PdfReadError | |
| # except ImportError: | |
| # from PyPDF2._reader import PdfReadError | |
| from pypdf import PdfReader, PdfWriter | |
| try: | |
| from pypdf.utils import PdfReadError | |
| except ImportError: | |
| from pypdf._reader import PdfReadError | |
| def eprint(*args, **kwargs): | |
| """Print to stderr | |
| Taken from https://stackoverflow.com/a/14981125/7564988 | |
| """ | |
| print(*args, file=sys.stderr, **kwargs) | |
| def get_cmdline_arguments(): | |
| """Retrieve command line arguments.""" | |
| from optparse import OptionParser | |
| usage_string = "%prog [-v] [-o output_name] [-b bookmarks_file] file1, file2 [, ...]" | |
| parser = OptionParser(usage_string) | |
| parser.add_option( | |
| "-o", "--output", | |
| dest="output_filename", | |
| default=time.strftime("output_%Y%m%d_%H%M%S"), | |
| help="Specify output filename (exclude .pdf extension); default is current date/time stamp" | |
| ) | |
| parser.add_option( | |
| "-b", "--bookmarks", | |
| dest="bookmark_file", | |
| default=None, | |
| help="Specify the bookmark names for each file. The file should be new-line delimited and the number of lies must match the number of input files. If not given, the name of each file will be used as the bookmark name." | |
| ) | |
| parser.add_option("-v", "--verbose", | |
| action="store_true", dest="verbose", default=True, | |
| help="Print detailed output (undoes quiet)") | |
| parser.add_option("-q", "--quiet", | |
| action="store_false", dest="verbose", default=True, | |
| help="Do not print detailed output (undoes verbose)") | |
| options, args = parser.parse_args() | |
| if len(args) < 2: | |
| parser.print_help() | |
| sys.exit(1) | |
| return options, args | |
| def main(): | |
| options, filenames = get_cmdline_arguments() | |
| verboseprint = print if options.verbose else lambda *a, **k: None | |
| output_pdf_name = options.output_filename + ".pdf" | |
| files_to_merge = [] | |
| bookmarks = [] | |
| verboseprint(f"Output filename: {output_pdf_name}") | |
| verboseprint(f"Input filenames:") | |
| for f in filenames: | |
| verboseprint(f"\t{f}") | |
| # gather bookmark names | |
| if options.bookmark_file: | |
| bookmark_path = Path(options.bookmark_file) | |
| assert bookmark_path.exists(), f"Bookmark file '{options.bookmark_file}' does not exist." | |
| with bookmark_path.open() as file: | |
| bookmarks = file.read().splitlines() | |
| assert len(bookmarks) == len(filenames), f"Number of bookmarks in '{bookmark_path}' ({len(bookmarks)}) does not match the number of files ({len(filenames)})" | |
| verboseprint(f"Bookmark Names:") | |
| for b in bookmarks: | |
| verboseprint(f"\t{b}") | |
| else: | |
| bookmarks = [filename for filename in filenames] | |
| verboseprint(f"Bookmark Names: Same as filenames") | |
| # get PDF files | |
| for f in filenames: | |
| try: | |
| next_pdf_file = PdfReader(open(f, "rb")) | |
| except(PdfReadError): | |
| eprint("%s is not a valid PDF file." % f) | |
| sys.exit(1) | |
| except(IOError): | |
| eprint("%s could not be found." % f) | |
| sys.exit(1) | |
| else: | |
| files_to_merge.append(next_pdf_file) | |
| # merge page by page | |
| output_pdf_stream = PdfWriter() | |
| j=0 | |
| k=0 | |
| for f in files_to_merge: | |
| verboseprint(f"Adding {filenames[k]} to output") | |
| for i in range(len(f.pages)): | |
| output_pdf_stream.add_page(f.pages[i]) | |
| if i==0: | |
| output_pdf_stream.add_outline_item(str(bookmarks[k]),j) | |
| j = j + 1 | |
| k += 1 | |
| # create output pdf file | |
| verboseprint(f"Writing output file...") | |
| try: | |
| output_pdf_file = open(output_pdf_name, "wb") | |
| output_pdf_stream.write(output_pdf_file) | |
| finally: | |
| output_pdf_file.close() | |
| print("%s successfully created." % output_pdf_name) | |
| if __name__ == "__main__": | |
| main() |
Latest update adds the ability to customize what the bookmark labels to be added are and also adds more verbose output (with the option of disabling).