Skip to content

Instantly share code, notes, and snippets.

@Macuyiko
Last active August 29, 2015 14:15
Show Gist options
  • Save Macuyiko/a170686e59a919ef49e6 to your computer and use it in GitHub Desktop.
Save Macuyiko/a170686e59a919ef49e6 to your computer and use it in GitHub Desktop.
import java.io.FileNotFoundException;
import java.util.Date;
import org.pdfclown.documents.Document;
import org.pdfclown.documents.Page;
import org.pdfclown.documents.contents.ContentScanner;
import org.pdfclown.documents.contents.Contents;
import org.pdfclown.documents.contents.composition.PrimitiveComposer;
import org.pdfclown.documents.contents.objects.ContentObject;
import org.pdfclown.documents.contents.objects.Path;
import org.pdfclown.documents.contents.objects.SetFillColor;
import org.pdfclown.documents.contents.objects.Text;
import org.pdfclown.documents.interaction.viewer.ViewerPreferences;
import org.pdfclown.documents.interchange.metadata.Information;
import org.pdfclown.files.File;
import org.pdfclown.files.SerializationModeEnum;
import org.pdfclown.objects.PdfInteger;
import org.pdfclown.documents.contents.colorSpaces.DeviceRGBColor;
public class PDFCleaner {
public static boolean DEBUG = false;
public static void main(String[] args) throws FileNotFoundException {
String inputPath = args[0];
String outputPath = args[1];
DEBUG = args.length > 2;
File file = new File(inputPath);
Document document = file.getDocument();
int nrPages = document.getNumberOfPages();
for (int p = 0; p < nrPages; p++) {
System.out.println("Processing page: "+p);
Page page = document.getPages().get(p);
Contents pageContents = page.getContents();
ContentScanner scanner = new ContentScanner(pageContents);
process(scanner, page, 1);
pageContents.flush();
}
System.out.println("Done, now saving");
serialize(file, outputPath);
}
private static void process(ContentScanner level, Page page, int l) {
if (level == null)
return;
PrimitiveComposer builder = new PrimitiveComposer(level);
while (level.moveNext()) {
ContentObject object = level.getCurrent();
if (DEBUG)
System.err.println(l+": "+"ContentObject: "+object.getClass()+" "+object.toString());
process(level.getChildLevel(), page, l+1);
if (object instanceof Path) {
builder.setFillColor(new DeviceRGBColor(1,1,1));
} else if(object instanceof Text) {
builder.setFillColor(new DeviceRGBColor(0,0,0));
} else if(level.getParent() instanceof Text && object instanceof SetFillColor) {
SetFillColor t = (SetFillColor) object;
t.getComponents().clear();
t.getComponents().add(new PdfInteger(0));
t.getComponents().add(new PdfInteger(0));
t.getComponents().add(new PdfInteger(0));
} else if(level.getParent() instanceof Path && object instanceof SetFillColor) {
SetFillColor t = (SetFillColor) object;
t.getComponents().clear();
t.getComponents().add(new PdfInteger(1));
t.getComponents().add(new PdfInteger(1));
t.getComponents().add(new PdfInteger(1));
}
}
}
private static void applyDocumentSettings(Document document) {
ViewerPreferences view = new ViewerPreferences(document);
document.setViewerPreferences(view);
view.setDisplayDocTitle(true);
Information info = document.getInformation();
info.setCreationDate(new Date());
info.setCreator("PDF Cleaner");
}
private static String serialize(File file, String fileName) {
applyDocumentSettings(file.getDocument());
SerializationModeEnum serializationMode = SerializationModeEnum.Standard;
java.io.File outputFile = new java.io.File(fileName);
try {
file.save(outputFile, serializationMode);
} catch (Exception e) {
e.printStackTrace();
}
return outputFile.getPath();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment