Code sample

All librairies used are free and open source.

Java

/* 
  See https://pdfbox.apache.org 
  See https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox for released artifacts
*/
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
//...
PDDocument document = PDDocument.load(new File("Test.pdf"));
PDDocumentInformation infoEmpty = new PDDocumentInformation();
document.setDocumentInformation(infoEmpty);
PDMetadata newMetadataEmpty = new PDMetadata(document);
document.getDocumentCatalog().setMetadata(newMetadataEmpty);
document.save("TestCleaned.pdf");

C#

/* 
   See http://www.pdfsharp.net/PDFsharpOverview.ashx 
   Install the NuGET package via "Install-Package PdfSharp"
*/
using PdfSharp.Pdf;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.IO;
//...
PdfDocument document = PdfReader.Open(@"C:\Test.pdf", PdfDocumentOpenMode.Modify);            
PdfDictionary metadata = new PdfDictionary();            
PdfReference pdfRef = new PdfReference(metadata);
document.Internals.Catalog.Elements.SetReference("/Metadata", pdfRef);
document.Info.Elements.Clear();
//See here https://github.com/empira/PDFsharp/blob/master/src/PdfSharp/Pdf/PdfDocument.cs#L452
//The lib add his product info if "Creator" is empty and,
//unfortunately, it add his product info as "Producer" in all case.
//So it is not a perfect clean but it remove all the initial metadata of the document and 
//override the "real" producer of the inital PDF document.
document.Info.Creator = "NA";
document.Save(@"C:\TestCleaned.pdf");

righettod/pdf_metadata_cleanup.md

Code sample

Java

C#

Embedded1993 commented May 23, 2023

righettod commented May 23, 2023