/*
 * Copyright Datalogics, Inc. 2015
 */

package pdfjt.cookbook.document;

import com.adobe.fontengine.font.Font;
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.graphics.font.PDFFont;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFAnnotationEnum;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFAnnotationRedaction;
import com.adobe.pdfjt.pdf.page.PDFPage;
import com.adobe.pdfjt.services.ap.AppearanceService;
import com.adobe.pdfjt.services.ap.spi.APContext;
import com.adobe.pdfjt.services.ap.spi.APResources;
import com.adobe.pdfjt.services.readingorder.ReadingOrderTextExtractor;
import com.adobe.pdfjt.services.redaction.RedactionOptions;
import com.adobe.pdfjt.services.redaction.RedactionService;
import com.adobe.pdfjt.services.textextraction.Word;
import com.adobe.pdfjt.services.textextraction.WordsIterator;
import com.datalogics.pdf.document.FontSetLoader;
import com.datalogics.pdf.samples.util.IoUtils;

import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.EnumSet;
import java.util.HashMap;

/**
 * Searches for phone numbers and redacts them. 
 */
public class SearchAndRedactUsingRegEx {

    private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/SearchAndRedactUsingRegEx_Input.pdf";

    static public void main(String[] args) throws Exception {

        // First read in the PDF file
        URLConnection connection = new URL(inputPDFURL).openConnection();
        connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
        connection.connect();
        InputStream fis = connection.getInputStream();        
        ByteReader byteReader = new InputStreamByteReader(fis);
        PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
        
        // Then get the first (and only) page in the file. We'll need this object in order to add annotations to it.
        PDFPage pdfPageOne = pdfDocument.requirePages().getPage(0);

        ReadingOrderTextExtractor textExtractor = ReadingOrderTextExtractor.newInstance(pdfDocument, FontSetLoader.newInstance().getFontSet());
        
        WordsIterator wordsIterator = textExtractor.getWordsIterator();
        System.out.println("Phone Numbers Found:");
        while (wordsIterator.hasNext()) {
            Word word = wordsIterator.next();
            // Look for phone number pattern ###-###-####
            if (word.toString().matches("\\d{3}[-\\.\\s]\\d{3}[-\\.\\s]\\d{4}")) {
                System.out.println(word.toString());
                /*
                 * Create a new Redaction annotation and use the location
                 * properties of the word to set the properties of the
                 * annotation.
                 * 
                 */
                PDFAnnotationRedaction pdfAnnotationRedaction = PDFAnnotationRedaction.newInstance(pdfDocument);
                pdfAnnotationRedaction.setQuadPoints(wordQuadsToAnnotQuads(word));
                pdfAnnotationRedaction.setRect(pdfAnnotationRedaction.getRedactionAreaBBox());
                pdfAnnotationRedaction.setColor(new double[] { 1, 0, 0 }); // red
                pdfAnnotationRedaction.setInteriorColor(new double[] { 0, 0, 0 }); // black
                pdfPageOne.addAnnotation(pdfAnnotationRedaction);                
            }
        }
        // Now create the appearances of the Redaction annotations
        APResources apResources = new APResources(pdfDocument.getCosDocument().getOptions().getFontSet(), 
                pdfDocument.getCosDocument().getOptions().getDocLocale(), 
                new HashMap<Font, PDFFont>());
        APContext apContext = new APContext(apResources, true, null);
        apContext.setAnnotationsToBeProcessed(EnumSet.of(PDFAnnotationEnum.Redact));
        AppearanceService.generateAppearances(pdfDocument, apContext, null);
        
        // Apply the redactions
        RedactionService.applyRedaction(pdfDocument, 
                new RedactionOptions(null), 
                IoUtils.newByteWriter(IoUtils.createUrlFromPath("SearchAndRedactUsingRegEx_Output.pdf")));

        System.out.println("Done!");
    }
    
    public static double[] wordQuadsToAnnotQuads(Word word) throws Exception {
        double[] quadPoints = new double[8];
        quadPoints[0] = word.getBoundingQuads().get(0).p1().x();
        quadPoints[1] = word.getBoundingQuads().get(0).p1().y();
        quadPoints[2] = word.getBoundingQuads().get(0).p2().x();
        quadPoints[3] = word.getBoundingQuads().get(0).p2().y();
        quadPoints[4] = word.getBoundingQuads().get(0).p3().x();
        quadPoints[5] = word.getBoundingQuads().get(0).p3().y();
        quadPoints[6] = word.getBoundingQuads().get(0).p4().x();
        quadPoints[7] = word.getBoundingQuads().get(0).p4().y();
        return quadPoints;
    }

    
}