Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save documentprocessing/1c275226ad45f4369540307d589b78a3 to your computer and use it in GitHub Desktop.

Select an option

Save documentprocessing/1c275226ad45f4369540307d589b78a3 to your computer and use it in GitHub Desktop.
Read and edit PDF metadata (standard and XML metadata) in Python using PyMuPDF library. Check https://products.documentprocessing.com/metadata/python/pymupdf/ for more details.
# Import PyMuPDF library
import fitz
# Open the PDF file
doc = fitz.open('documentprocessing.pdf')
# Define new metadata
new_metadata = {
'author': 'Document Processing',
'title': 'Test Document',
'subject': 'Test document to review document processing libraries'
}
# Set the new metadata in the document
doc.set_metadata(new_metadata)
# Save the document with the changes
doc.save("doc.pdf")
# Close the document to release resources
doc.close()
# Import the PyMuPDF library
import fitz
# Open the PDF file
doc = fitz.open("documentprocessing.pdf")
# Get the current metadata as XML
xml_metadata = doc.get_xml_metadata()
# Update the creator field in the XML metadata
updated_xml_metadata = xml_metadata.replace(
'<rdf:li>Document</rdf:li>',
f'<rdf:li>Document Processing</rdf:li>'
)
# Set the updated XML metadata in the PDF document
doc.set_xml_metadata(updated_xml_metadata)
# Save the changes to a new PDF file
doc.save("data.pdf")
# Close the PDF document
doc.close()
# Import PyMuPDF library
import fitz
# Import pprint module for pretty printing
from pprint import pprint
# Open the PDF file using PyMuPDF
doc = fitz.open('doc.pdf')
# Pretty print the metadata of the opened PDF
pprint(doc.metadata)
# Close the PDF document to release resources
doc.close()
# Import the PyMuPDF module
import fitz
# Open the PDF file
doc = fitz.open("data.pdf")
# Print the XML metadata of the PDF document
print(doc.get_xml_metadata())
# Close the PDF document
doc.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment