All librairies used are free and open source.
/*
See https://pdfbox.apache.org
See https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox for released artifacts
*/
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
//...
PDDocument document = PDDocument.load(new File("Test.pdf"));
PDDocumentInformation infoEmpty = new PDDocumentInformation();
document.setDocumentInformation(infoEmpty);
PDMetadata newMetadataEmpty = new PDMetadata(document);
document.getDocumentCatalog().setMetadata(newMetadataEmpty);
document.save("TestCleaned.pdf");
/*
See http://www.pdfsharp.net/PDFsharpOverview.ashx
Install the NuGET package via "Install-Package PdfSharp"
*/
using PdfSharp.Pdf;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.IO;
//...
PdfDocument document = PdfReader.Open(@"C:\Test.pdf", PdfDocumentOpenMode.Modify);
PdfDictionary metadata = new PdfDictionary();
PdfReference pdfRef = new PdfReference(metadata);
document.Internals.Catalog.Elements.SetReference("/Metadata", pdfRef);
document.Info.Elements.Clear();
//See here https://github.com/empira/PDFsharp/blob/master/src/PdfSharp/Pdf/PdfDocument.cs#L452
//The lib add his product info if "Creator" is empty and,
//unfortunately, it add his product info as "Producer" in all case.
//So it is not a perfect clean but it remove all the initial metadata of the document and
//override the "real" producer of the inital PDF document.
document.Info.Creator = "NA";
document.Save(@"C:\TestCleaned.pdf");
Hi,
But It is not removing the XMP Metadata like below
xmpMM:Manifest
rdf:Seq
<rdf:li rdf:parseType="Resource">
stMfs:linkFormEmbedByReference</stMfs:linkForm>
<stMfs:reference rdf:parseType="Resource">
stRef:filePath/Users/.../filename.psd</stRef:filePath>
</stMfs:reference>
</rdf:li>
</rdf:Seq>
</xmpMM:Manifest