documentprocessing · December 7, 2023 17:39
diff --git a/edit-metadata-of-pdfs-using-pymupdf-library.py b/edit-metadata-of-pdfs-using-pymupdf-library.py
 # Import PyMuPDF library
 import fitz

 # Open the PDF file
 doc = fitz.open('documentprocessing.pdf')

 # Define new metadata
 new_metadata = {
    'author': 'Document Processing',
    'title': 'Test Document',
    'subject': 'Test document to review document processing libraries'
 }

 # Set the new metadata in the document
 doc.set_metadata(new_metadata)

 # Save the document with the changes
 doc.save("doc.pdf")

 # Close the document to release resources
 doc.close()
diff --git a/edit-xml-metadata-of-pdfs-using-pymupdf-library.py b/edit-xml-metadata-of-pdfs-using-pymupdf-library.py
 # Import the PyMuPDF library
 import fitz

 # Open the PDF file
 doc = fitz.open("documentprocessing.pdf")

 # Get the current metadata as XML
 xml_metadata = doc.get_xml_metadata()

 # Update the creator field in the XML metadata
 updated_xml_metadata = xml_metadata.replace(
    '<rdf:li>Document</rdf:li>',
    f'<rdf:li>Document Processing</rdf:li>'
 )

 # Set the updated XML metadata in the PDF document
 doc.set_xml_metadata(updated_xml_metadata)

 # Save the changes to a new PDF file
 doc.save("data.pdf")

 # Close the PDF document
 doc.close()
diff --git a/read-pdf-metadata-using-pymupdf-library.py b/read-pdf-metadata-using-pymupdf-library.py
 # Import PyMuPDF library
 import fitz

 # Import pprint module for pretty printing
 from pprint import pprint

 # Open the PDF file using PyMuPDF
 doc = fitz.open('doc.pdf')

 # Pretty print the metadata of the opened PDF
 pprint(doc.metadata)

 # Close the PDF document to release resources
 doc.close()
diff --git a/read-xml-metadata-of-pdfs-using-pymupdf-library.py b/read-xml-metadata-of-pdfs-using-pymupdf-library.py
 # Import the PyMuPDF module
 import fitz

 # Open the PDF file
 doc = fitz.open("data.pdf")

 # Print the XML metadata of the PDF document
 print(doc.get_xml_metadata())

 # Close the PDF document
 doc.close()
	# Import PyMuPDF library
	import fitz

	# Open the PDF file
	doc = fitz.open('documentprocessing.pdf')

	# Define new metadata
	new_metadata = {
	'author': 'Document Processing',
	'title': 'Test Document',
	'subject': 'Test document to review document processing libraries'
	}

	# Set the new metadata in the document
	doc.set_metadata(new_metadata)

	# Save the document with the changes
	doc.save("doc.pdf")

	# Close the document to release resources
	doc.close()
	# Import the PyMuPDF library
	import fitz

	# Open the PDF file
	doc = fitz.open("documentprocessing.pdf")

	# Get the current metadata as XML
	xml_metadata = doc.get_xml_metadata()

	# Update the creator field in the XML metadata
	updated_xml_metadata = xml_metadata.replace(
	'<rdf:li>Document</rdf:li>',
	f'<rdf:li>Document Processing</rdf:li>'
	)

	# Set the updated XML metadata in the PDF document
	doc.set_xml_metadata(updated_xml_metadata)

	# Save the changes to a new PDF file
	doc.save("data.pdf")

	# Close the PDF document
	doc.close()
	# Import PyMuPDF library
	import fitz

	# Import pprint module for pretty printing
	from pprint import pprint

	# Open the PDF file using PyMuPDF
	doc = fitz.open('doc.pdf')

	# Pretty print the metadata of the opened PDF
	pprint(doc.metadata)

	# Close the PDF document to release resources
	doc.close()
	# Import the PyMuPDF module
	import fitz

	# Open the PDF file
	doc = fitz.open("data.pdf")

	# Print the XML metadata of the PDF document
	print(doc.get_xml_metadata())

	# Close the PDF document
	doc.close()