-
-
Save DIYer22/b9ede6b5b96109788a47973649645c1f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
helpDoc = ''' | |
Add Page Number to PDF file with Python | |
Python 给 PDF 添加 页码 | |
usage: | |
python addPageNumberToPDF.py [PDF path] | |
require: | |
pip install reportlab pypdf2 | |
Support both Python2/3, But more recommend Python3 | |
tips: | |
* output file will save at pdfWithNumbers/[PDF path]_page.pdf | |
* only support A4 size PDF | |
* tested on Python2/Python3@ubuntu | |
* more large size of PDF require more RAM | |
* if segmentation fault, plaese try use Python 3 | |
* if generate PDF document is damaged, plaese try use Python 3 | |
Author: | |
Lei Yang ([email protected]) | |
GitHub: | |
https://gist.github.com/DIYer22/b9ede6b5b96109788a47973649645c1f | |
''' | |
print(helpDoc) | |
import reportlab | |
from reportlab.lib.units import mm | |
from reportlab.pdfgen import canvas | |
from PyPDF2 import PdfFileWriter, PdfFileReader | |
def createPagePdf(num, tmp): | |
c = canvas.Canvas(tmp) | |
for i in range(1,num+1): | |
c.drawString((210//2)*mm, (4)*mm, str(i)) | |
c.showPage() | |
c.save() | |
return | |
with open(tmp, 'rb') as f: | |
pdf = PdfFileReader(f) | |
layer = pdf.getPage(0) | |
return layer | |
if __name__ == "__main__": | |
pass | |
import sys,os | |
path = 'MLDS17f.pdf' | |
# path = '1.pdf' | |
if len(sys.argv) == 1: | |
if not os.path.isfile(path): | |
sys.exit(1) | |
else: | |
path = sys.argv[1] | |
base = os.path.basename(path) | |
tmp = "__tmp.pdf" | |
batch = 10 | |
batch = 0 | |
output = PdfFileWriter() | |
with open(path, 'rb') as f: | |
pdf = PdfFileReader(f,strict=False) | |
n = pdf.getNumPages() | |
if batch == 0: | |
batch = -n | |
createPagePdf(n,tmp) | |
if not os.path.isdir('pdfWithNumbers/'): | |
os.mkdir('pdfWithNumbers/') | |
with open(tmp, 'rb') as ftmp: | |
numberPdf = PdfFileReader(ftmp) | |
for p in range(n): | |
if not p%batch and p: | |
newpath = path.replace(base, 'pdfWithNumbers/'+ base[:-4] + '_page_%d'%(p//batch) + path[-4:]) | |
with open(newpath, 'wb') as f: | |
output.write(f) | |
output = PdfFileWriter() | |
# sys.stdout.write('\rpage: %d of %d'%(p, n)) | |
print('page: %d of %d'%(p, n)) | |
page = pdf.getPage(p) | |
numberLayer = numberPdf.getPage(p) | |
page.mergePage(numberLayer) | |
output.addPage(page) | |
if output.getNumPages(): | |
newpath = path.replace(base, 'pdfWithNumbers/' + base[:-4] + '_page_%d'%(p//batch + 1) + path[-4:]) | |
with open(newpath, 'wb') as f: | |
output.write(f) | |
os.remove(tmp) |
@gmsebra
Thank you for sharing your case!
I have been writing this code for a long time. And I am no longer familiar with this code or pypdf2. So, I am sorry that I don't know what happening either.
Fair enough, thanks!
Hi brother, I would like to check, at what line does your code define the pdf input? also what is the tmp variable?
The codes gets the input file from the command line in line 58:
path = sys.argv[1]
the tmp
variable just defines the name for a temporary file that is written during the process, ad erased in the end.
#!/usr/bin/env python3
-- coding: utf-8 --
helpDoc = '''
Add Page Number to PDF file with Python
Python 给 PDF 添加 页码
usage:
python addPageNumberToPDF.py [PDF path]
require:
pip install reportlab pypdf2
Support both Python2/3, But more recommend Python3
tips:
* output file will save at pdfWithNumbers/[PDF path]_page.pdf
* only support A4 size PDF
* tested on Python2/Python3@ubuntu
* more large size of PDF require more RAM
* if segmentation fault, plaese try use Python 3
* if generate PDF document is damaged, plaese try use Python 3
Author:
Lei Yang ([email protected])
GitHub:
https://gist.github.com/DIYer22/b9ede6b5b96109788a47973649645c1f
'''
print(helpDoc)
import reportlab
from reportlab.lib.units import mm
from reportlab.pdfgen import canvas
from PyPDF2 import PdfWriter, PdfReader
def createPagePdf(num, tmp):
c = canvas.Canvas(tmp)
for i in range(1,num+1):
c.drawString((210//2)*mm, (4)*mm, str(i))
c.showPage()
c.save()
with open(tmp, 'rb') as f:
pdf = PdfReader(f)
layer = pdf.pages[0]
return layer
if name == "main":
pass
import sys,os
path = 'MLDS17f.pdf'
if len(sys.argv) == 1:
if not os.path.isfile(path):
sys.exit(1)
else:
path = sys.argv[1]
base = os.path.basename(path)
tmp = "__tmp.pdf"
batch = 10
batch = 0
output = PdfWriter()
with open(path, 'rb') as f:
pdf = PdfReader(f)
n = len(pdf.pages)
if batch == 0:
batch = -n
createPagePdf(n,tmp)
if not os.path.isdir('pdfWithNumbers/'):
os.mkdir('pdfWithNumbers/')
with open(tmp, 'rb') as ftmp:
numberPdf = PdfReader(ftmp)
for p in range(n):
if not p%batch and p:
newpath = path.replace(base, 'pdfWithNumbers/'+ base[:-4] + '_page_%d'%(p//batch) + path[-4:])
with open(newpath, 'wb') as f:
output.write(f)
output = PdfWriter()
print('page: %d of %d'%(p, n))
page = pdf.pages[p]
numberLayer = numberPdf.pages[p]
page.merge_page(numberLayer)
output.add_page(page)
if output._pages:
newpath = path.replace(base, 'pdfWithNumbers/' + base[:-4] + '_page_%d'%(p//batch + 1) + path[-4:])
with open(newpath, 'wb') as f:
output.write(f)
os.remove(tmp)
Hi there,
Thanks for sharing this code!
BTW, I tried it with a PDF of mine, that I created by merging PDFs with pypdf2. The result is interesting, where it adds the number on bottom in some pages, and on the left and rotated (as if the page was in landscape) in other pages. Do you know what may be happening here? (I can send you the files directly if you want).