Skip to content

Instantly share code, notes, and snippets.

@JoelGeraci-Datalogics
Last active August 29, 2015 14:22
Show Gist options
  • Save JoelGeraci-Datalogics/88dc2add2296b2d2ddb5 to your computer and use it in GitHub Desktop.
Save JoelGeraci-Datalogics/88dc2add2296b2d2ddb5 to your computer and use it in GitHub Desktop.
Creating PDF Portfolios from ZIP files using the Datalogics PDF Java Toolkit
package pdfjt.cookbook.combine;
/*
* ****************************************************************************
*
* Copyright 2009-2012 Adobe Systems Incorporated. All Rights Reserved.
* Portions Copyright 2012-2014 Datalogics Incorporated.
*
* NOTICE: Datalogics and Adobe permit you to use, modify, and distribute
* this file in accordance with the terms of the license agreement
* accompanying it. If you have received this file from a source other
* than Adobe or Datalogics, then your use, modification, or distribution of it
* requires the prior written permission of Adobe or Datalogics.
*
* ***************************************************************************
*/
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.ByteWriter;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.internal.io.LazyRandomAccessFileByteReader;
import com.adobe.pdfjt.core.cos.CosDictionary;
import com.adobe.pdfjt.core.cos.CosObject;
import com.adobe.pdfjt.core.types.ASDate;
import com.adobe.pdfjt.core.types.ASName;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFEmbeddedFile;
import com.adobe.pdfjt.pdf.document.PDFEmbeddedFileInfo;
import com.adobe.pdfjt.pdf.document.PDFFileSpecification;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions;
import com.adobe.pdfjt.pdf.document.PDFVersion;
import com.adobe.pdfjt.pdf.filters.PDFFilterFlate;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionField;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionFieldType;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionFolder;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionItem;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionItemData;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionSchema;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionSort;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionUtil;
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFPortableCollection;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import pdfjt.util.SampleFileServices;
public class CreatePDFPortfolioFromZIP {
private static final String coverSheetPath = "cookbook/Collections/coversheet.pdf";
/*
* Remove the .zip extension if you want to run this sample using a folder
* full of files rather than a .zip file.
*
* Download input files from http://dev.datalogics.com/downloads/Collections.zip
*/
private static final String inputFiles = "cookbook/Collections/CollectionFiles.zip";
private static final String outputDir = "cookbook/output/Collection/";
private static PDFDocument coverSheet = null;
private static PDFPortableCollection pdfPortableCollection = null;
private static PDFCollectionSchema pdfCollectionSchema = null;
/**
* @param args
*/
public static void main(String[] args) throws Exception{
try {
File file = new File(coverSheetPath);
FileInputStream fis = new FileInputStream(file);
ByteReader byteReader = new InputStreamByteReader(fis);
/*
* A PDF Portfolio or "Portable Collection" consists of a base PDF
* file or "Coversheet" with attachments and optionally other
* dictionaries that describe how to display those attachments so we
* need to start with a new or existing PDF file.
*/
coverSheet = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
/*
* Then we need to add the "collection dictionary" so we can add
* attachments and associate metadata with them.
*/
pdfPortableCollection = PDFPortableCollection.newInstance(coverSheet);
pdfPortableCollection.setRootFolder("Root");
coverSheet.requireCatalog().setCollection(pdfPortableCollection);
/*
* Then we add a "schema" to the collection. We want to sort the
* files based on their filename so we set that up here.
*/
pdfCollectionSchema = PDFCollectionSchema.newInstance(coverSheet);
PDFCollectionField field = PDFCollectionField.newInstance(coverSheet, PDFCollectionFieldType.file, "Name");
field.setOrder(0);
pdfCollectionSchema.set(ASName.create("FileName"), field);
PDFCollectionSort sort = PDFCollectionSort.newInstance(coverSheet, new ASName[]{ASName.create("FileName"), ASName.create("Datalogics:Title")});
pdfPortableCollection.setSort(sort);
/*
* Now we can begin adding files. If the input is a .zip file, we
* expand that into a temporary folder first. Otherwise we use the
* folder full of files as input
*/
String attachmentsFolder;
if (inputFiles.endsWith(".zip")) {
unZip(inputFiles);
attachmentsFolder = outputDir+"/temp";
}
else {
attachmentsFolder = inputFiles;
}
// Add the files and folders by recursing the root folder
addFilesAndFolders(attachmentsFolder, pdfPortableCollection.getRootFolder());
//Set the schema for the collection
pdfPortableCollection.setSchema(pdfCollectionSchema);
//collectionDict.setInitialDocumentName(new ASString(new File(pdfFile1).getAbsolutePath()));
coverSheet.requireCatalog().setCollection(pdfPortableCollection);
//
// ------------ Project complete. Save the new collection ---------
ByteWriter outputWriter = SampleFileServices.getRAFByteWriter(
outputDir + "NewCollection.pdf");
PDFSaveFullOptions fullOptions = PDFSaveFullOptions.newInstance(PDFVersion.v1_7);
coverSheet.save(outputWriter, fullOptions);
} finally {
coverSheet.close();
}
System.out.println("Done!");
}
/**
* Decompresses a zip file to a temporary folder in the output directory
*
* @param zipFile the path to the .zip file to be decompressed
* @throws ZipException
* @throws IOException
*/
@SuppressWarnings("resource")
static public void unZip(String zipFile) throws ZipException, IOException
{
int BUFFER = 2048;
File file = new File(zipFile);
ZipFile zip = new ZipFile(file);
String newPath = zipFile.substring(0, zipFile.length() - 4);
new File(newPath).mkdir();
Enumeration<? extends ZipEntry> zipFileEntries = zip.entries();
while (zipFileEntries.hasMoreElements())
{
ZipEntry entry = (ZipEntry) zipFileEntries.nextElement();
String currentEntry = entry.getName();
File destFile = new File(outputDir+"/temp", currentEntry);
File destinationParent = destFile.getParentFile();
destinationParent.mkdirs();
if (!entry.isDirectory())
{
BufferedInputStream is = new BufferedInputStream(zip
.getInputStream(entry));
int currentByte;
byte data[] = new byte[BUFFER];
FileOutputStream fos = new FileOutputStream(destFile);
BufferedOutputStream dest = new BufferedOutputStream(fos, BUFFER);
while ((currentByte = is.read(data, 0, BUFFER)) != -1) {
dest.write(data, 0, currentByte);
}
dest.flush();
dest.close();
is.close();
}
if (currentEntry.endsWith(".zip"))
{
unZip(destFile.getAbsolutePath());
}
}
}
/**
* Recursively look for files and folders from the initial input folder path to add to the PDF Collection
*
* @param path The path of the file to add
* @param currentFolder The collection object to add the path to
* @throws Exception
*/
public static void addFilesAndFolders(String path, PDFCollectionFolder currentFolder) throws Exception {
File root = new File(path);
File[] list = root.listFiles();
if (list == null) return;
for (File f : list) {
if (f.isDirectory()) {
// If the File is a folder, create the folder within it's parent
PDFCollectionFolder newFolder = PDFCollectionFolder.newInstance(currentFolder, f.getName());
// Create the Collection Item and set it's key/value pairs so that a folder name appears in the Acrobat UI
PDFCollectionItem pdfCollectionItem = PDFCollectionItem.newInstance(coverSheet);
PDFCollectionItemData titleData = PDFCollectionItemData.newInstance(coverSheet, f.getName(), ASName.create("Datalogics:Title"));
pdfCollectionItem.setData(ASName.create("Datalogics:Title"), titleData);
newFolder.setCollectionItem(pdfCollectionItem);
// Recourse
addFilesAndFolders(f.getAbsolutePath(), newFolder );
}
else {
attachFile(f.getAbsolutePath(), currentFolder);
}
}
}
/**
* Adds a file to the collection and the file metadata to the schema
*
* @param path The path to the file to add
* @param currentFolder The folder in the collection that the file is added to
* @throws Exception
*/
private static void attachFile(String path, PDFCollectionFolder currentFolder) throws Exception
{
File attFile = new File(path);
ByteReader attByteReader = new LazyRandomAccessFileByteReader(attFile);
PDFFileSpecification pdfFileSpecification;
try {
// First, follow the standard setup for embedded files by creating the appropriate data.
ASDate modDate = new ASDate(new Date(attFile.lastModified()));
PDFEmbeddedFile fileToEmbedd = PDFEmbeddedFile.newInstance(coverSheet,
PDFEmbeddedFileInfo.newInstance(coverSheet,
(int)attByteReader.length(),
modDate,
modDate),
attByteReader);
fileToEmbedd.setFilter(PDFFilterFlate.newInstance(coverSheet, null));
pdfFileSpecification = PDFFileSpecification.newInstance(coverSheet, Paths.get(attFile.getName()).getFileName().toString().getBytes(), fileToEmbedd);
PDFCollectionUtil.addFileToPDFCollectionFolder(Paths.get(attFile.getName()).getFileName().toString(), pdfFileSpecification, currentFolder);
// Get metadata information from PDF and add it to the schema so
// that the Portfolio can display it. If the file isn't a PDF, just
// add a title field based on the filename.
PDFCollectionItem pdfCollectionItem = PDFCollectionItem.newInstance(coverSheet);
if (path.endsWith(".pdf")) {
File file = new File(path);
FileInputStream fis = new FileInputStream(file);
ByteReader byteReader = new InputStreamByteReader(fis);
PDFDocument attachment = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
// The DocumentInfo ditionary holds the PDF metadata
CosDictionary infoCosDict = attachment.getDocumentInfo().getCosDictionary();
Iterator<ASName> iterator = infoCosDict.keyIterator();
while (iterator.hasNext()){
ASName keyName = iterator.next();
if (infoCosDict.get(keyName).getType() == CosObject.t_String) {
//Different Portfolio layouts can add fields to the schema so we differentiate ours by prefixing the field name with "Datalogics:"
ASName key = ASName.create("Datalogics:"+keyName.asString());
String value = infoCosDict.get(keyName).textValue();
PDFCollectionItemData pdfCollectionItemData = PDFCollectionItemData.newInstance(coverSheet, value, key);
pdfCollectionItem.setData(key, pdfCollectionItemData);
if (!pdfCollectionSchema.containsKey(keyName)) {
PDFCollectionField field = null;
//So that the date information appears correctly, we need to tell the schema to display the string value as a date.
if (keyName.toString().contains("Date")) {
field = PDFCollectionField.newInstance(coverSheet, PDFCollectionFieldType.date, keyName.asString());
}
else {
field = PDFCollectionField.newInstance(coverSheet, PDFCollectionFieldType.text, keyName.asString());
}
pdfCollectionSchema.set(key, field);
}
}
}
pdfFileSpecification.setCollectionItem(pdfCollectionItem);
}
else {
PDFCollectionItemData titleData = PDFCollectionItemData.newInstance(coverSheet, attFile.getName(), ASName.create("Datalogics:Title"));
pdfCollectionItem.setData(ASName.create("Datalogics:Title"), titleData);
pdfFileSpecification.setCollectionItem(pdfCollectionItem);
}
System.out.println("Added: " + attFile.getName());
} catch (Exception e) {
attByteReader.close();
throw e;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment