Last active
November 28, 2023 09:14
-
-
Save documentprocessing/c84df94cb904e0cf22d9f17618cf9493 to your computer and use it in GitHub Desktop.
Read and update metadata of PDFs in Python using pypdf library. Check https://products.documentprocessing.com/metadata/python/pypdf/ for more details.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import PdfReader module from the pypdf library | |
from pypdf import PdfReader | |
# Create a PdfReader object and load the input PDF file | |
reader = PdfReader("meta-pdf.pdf") | |
# Reading metadata | |
meta = reader.metadata | |
# Print the metadata | |
print(meta.author) | |
print(meta.producer) | |
print(meta.subject) | |
print(meta.title) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import PdfReader and PdfWriter modules from the pypdf library | |
from pypdf import PdfReader, PdfWriter | |
# Import the datetime module | |
from datetime import datetime | |
# Create a PdfReader object and load the input PDF file | |
reader = PdfReader("documentprocessing.pdf") | |
# Creating a new PDF writer object using PdfWriter | |
writer = PdfWriter() | |
# Adding all pages from the input PDF to the new writer | |
for page in reader.pages: | |
writer.add_page(page) | |
# Format the current date and time for the metadata | |
# UTC time offset (optional, adjust as needed) | |
utc_time = "-05'00'" | |
# Current date and time formatted for metadata | |
time = datetime.now().strftime(f"D\072%Y%m%d%H%M%S{utc_time}") | |
# Writing new metadata to the PDF | |
writer.add_metadata( | |
{ | |
"/Author": "Documentprocessing", # Author information | |
"/Producer": "Microsoft Word", # Software used to produce the PDF | |
"/Title": "Title", # Document title | |
"/Subject": "Subject", # Document subject | |
"/Keywords": "Keywords", # Keywords associated with the document | |
"/CreationDate": time, # Date and time the document was created | |
"/ModDate": time, # Date and time the document was last modified | |
"/Creator": "Creator", # Application that created the original document | |
} | |
) | |
# Save the new PDF to a file | |
with open("meta-pdf.pdf", "wb") as f: | |
writer.write(f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment