All librairies used are free and open source.
/*
See https://pdfbox.apache.org
See https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox for released artifacts
*/
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
//...
PDDocument document = PDDocument.load(new File("Test.pdf"));
PDDocumentInformation infoEmpty = new PDDocumentInformation();
document.setDocumentInformation(infoEmpty);
PDMetadata newMetadataEmpty = new PDMetadata(document);
document.getDocumentCatalog().setMetadata(newMetadataEmpty);
document.save("TestCleaned.pdf");
/*
See http://www.pdfsharp.net/PDFsharpOverview.ashx
Install the NuGET package via "Install-Package PdfSharp"
*/
using PdfSharp.Pdf;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.IO;
//...
PdfDocument document = PdfReader.Open(@"C:\Test.pdf", PdfDocumentOpenMode.Modify);
PdfDictionary metadata = new PdfDictionary();
PdfReference pdfRef = new PdfReference(metadata);
document.Internals.Catalog.Elements.SetReference("/Metadata", pdfRef);
document.Info.Elements.Clear();
//See here https://github.com/empira/PDFsharp/blob/master/src/PdfSharp/Pdf/PdfDocument.cs#L452
//The lib add his product info if "Creator" is empty and,
//unfortunately, it add his product info as "Producer" in all case.
//So it is not a perfect clean but it remove all the initial metadata of the document and
//override the "real" producer of the inital PDF document.
document.Info.Creator = "NA";
document.Save(@"C:\TestCleaned.pdf");
Yes. it's not removing xmp meta data.