Created
August 10, 2022 12:26
-
-
Save Burekasim/cb9686195ebd4e047f62b835770a5d40 to your computer and use it in GitHub Desktop.
Pdf merger (Regina 2000)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
from PyPDF2 import PdfFileMerger | |
import os | |
def get_pdf_files_from_current_folder(current_folder: str): | |
output_list = [] | |
for item in os.listdir(current_folder): | |
if item.endswith('.pdf'): | |
output_list.append(item) | |
return sorted(output_list) | |
if __name__ == '__main__': | |
folder = os.getcwd() | |
pdfs = get_pdf_files_from_current_folder(folder) | |
os.chdir(folder) | |
merger = PdfFileMerger() | |
for pdf in pdfs: | |
merger.append(pdf) | |
current_time = datetime.datetime.now().strftime('%H%M%S') | |
merger.write(f'{current_time}-regina2000.pdf') | |
merger.close() |
Hi @lore1734,
In line 11, the function returns a sorted list of the files.
Let's say you have 3 files:
b.pdf
d.pdf
a.pdf
If you add a number at the begging of each file, it will detemrine the order of the file in the pdf:
1d.pdf - first page.
2b.pdf - second page.
3a.pdf - third page.
And now there is a repository
https://github.com/Burekasim/Regina2000-pdf-merger
I've updated the repo with natural sort instead of the sorted function.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi!
Is there any way to order pages in files, i have below code to combine pdf with similar name but not sure how to select which file is the first page of pdf combined.
thank you
from pathlib import Path
from PyPDF2 import PdfFileMerger, PdfFileReader # pip install PyPDF2
pdf_dir = Path("PYTHON").parent / "pdf_files"
pdf_output_dir = Path("PYTHON").parent / "OUPUT"
pdf_output_dir.mkdir(parents=True, exist_ok=True)
pdf_files = list(pdf_dir.glob("*.pdf"))
keys = set([file.name[:3] for file in pdf_files])
BASE_FILE_NAME_LENGTH = 20
for key in keys:
merger = PdfFileMerger()
for file in pdf_files:
if file.name.startswith(key):
merger.append(PdfFileReader(str(file), "rb"))
if len(file.name) >= BASE_FILE_NAME_LENGTH:
base_file_name = file.name
merger.write(str(pdf_output_dir / base_file_name))
merger.close()