Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save documentprocessing/c84df94cb904e0cf22d9f17618cf9493 to your computer and use it in GitHub Desktop.
Save documentprocessing/c84df94cb904e0cf22d9f17618cf9493 to your computer and use it in GitHub Desktop.
Read and update metadata of PDFs in Python using pypdf library. Check https://products.documentprocessing.com/metadata/python/pypdf/ for more details.
# Import PdfReader module from the pypdf library
from pypdf import PdfReader
# Create a PdfReader object and load the input PDF file
reader = PdfReader("meta-pdf.pdf")
# Reading metadata
meta = reader.metadata
# Print the metadata
print(meta.author)
print(meta.producer)
print(meta.subject)
print(meta.title)
# Import PdfReader and PdfWriter modules from the pypdf library
from pypdf import PdfReader, PdfWriter
# Import the datetime module
from datetime import datetime
# Create a PdfReader object and load the input PDF file
reader = PdfReader("documentprocessing.pdf")
# Creating a new PDF writer object using PdfWriter
writer = PdfWriter()
# Adding all pages from the input PDF to the new writer
for page in reader.pages:
writer.add_page(page)
# Format the current date and time for the metadata
# UTC time offset (optional, adjust as needed)
utc_time = "-05'00'"
# Current date and time formatted for metadata
time = datetime.now().strftime(f"D\072%Y%m%d%H%M%S{utc_time}")
# Writing new metadata to the PDF
writer.add_metadata(
{
"/Author": "Documentprocessing", # Author information
"/Producer": "Microsoft Word", # Software used to produce the PDF
"/Title": "Title", # Document title
"/Subject": "Subject", # Document subject
"/Keywords": "Keywords", # Keywords associated with the document
"/CreationDate": time, # Date and time the document was created
"/ModDate": time, # Date and time the document was last modified
"/Creator": "Creator", # Application that created the original document
}
)
# Save the new PDF to a file
with open("meta-pdf.pdf", "wb") as f:
writer.write(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment