Last active
August 29, 2015 14:22
-
-
Save JoelGeraci-Datalogics/88dc2add2296b2d2ddb5 to your computer and use it in GitHub Desktop.
Creating PDF Portfolios from ZIP files using the Datalogics PDF Java Toolkit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package pdfjt.cookbook.combine; | |
/* | |
* **************************************************************************** | |
* | |
* Copyright 2009-2012 Adobe Systems Incorporated. All Rights Reserved. | |
* Portions Copyright 2012-2014 Datalogics Incorporated. | |
* | |
* NOTICE: Datalogics and Adobe permit you to use, modify, and distribute | |
* this file in accordance with the terms of the license agreement | |
* accompanying it. If you have received this file from a source other | |
* than Adobe or Datalogics, then your use, modification, or distribution of it | |
* requires the prior written permission of Adobe or Datalogics. | |
* | |
* *************************************************************************** | |
*/ | |
import com.adobe.internal.io.ByteReader; | |
import com.adobe.internal.io.ByteWriter; | |
import com.adobe.internal.io.InputStreamByteReader; | |
import com.adobe.internal.io.LazyRandomAccessFileByteReader; | |
import com.adobe.pdfjt.core.cos.CosDictionary; | |
import com.adobe.pdfjt.core.cos.CosObject; | |
import com.adobe.pdfjt.core.types.ASDate; | |
import com.adobe.pdfjt.core.types.ASName; | |
import com.adobe.pdfjt.pdf.document.PDFDocument; | |
import com.adobe.pdfjt.pdf.document.PDFEmbeddedFile; | |
import com.adobe.pdfjt.pdf.document.PDFEmbeddedFileInfo; | |
import com.adobe.pdfjt.pdf.document.PDFFileSpecification; | |
import com.adobe.pdfjt.pdf.document.PDFOpenOptions; | |
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions; | |
import com.adobe.pdfjt.pdf.document.PDFVersion; | |
import com.adobe.pdfjt.pdf.filters.PDFFilterFlate; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionField; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionFieldType; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionFolder; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionItem; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionItemData; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionSchema; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionSort; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFCollectionUtil; | |
import com.adobe.pdfjt.pdf.interactive.navigation.collection.PDFPortableCollection; | |
import java.io.BufferedInputStream; | |
import java.io.BufferedOutputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.nio.file.Paths; | |
import java.util.Date; | |
import java.util.Enumeration; | |
import java.util.Iterator; | |
import java.util.zip.ZipEntry; | |
import java.util.zip.ZipException; | |
import java.util.zip.ZipFile; | |
import pdfjt.util.SampleFileServices; | |
public class CreatePDFPortfolioFromZIP { | |
private static final String coverSheetPath = "cookbook/Collections/coversheet.pdf"; | |
/* | |
* Remove the .zip extension if you want to run this sample using a folder | |
* full of files rather than a .zip file. | |
* | |
* Download input files from http://dev.datalogics.com/downloads/Collections.zip | |
*/ | |
private static final String inputFiles = "cookbook/Collections/CollectionFiles.zip"; | |
private static final String outputDir = "cookbook/output/Collection/"; | |
private static PDFDocument coverSheet = null; | |
private static PDFPortableCollection pdfPortableCollection = null; | |
private static PDFCollectionSchema pdfCollectionSchema = null; | |
/** | |
* @param args | |
*/ | |
public static void main(String[] args) throws Exception{ | |
try { | |
File file = new File(coverSheetPath); | |
FileInputStream fis = new FileInputStream(file); | |
ByteReader byteReader = new InputStreamByteReader(fis); | |
/* | |
* A PDF Portfolio or "Portable Collection" consists of a base PDF | |
* file or "Coversheet" with attachments and optionally other | |
* dictionaries that describe how to display those attachments so we | |
* need to start with a new or existing PDF file. | |
*/ | |
coverSheet = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance()); | |
/* | |
* Then we need to add the "collection dictionary" so we can add | |
* attachments and associate metadata with them. | |
*/ | |
pdfPortableCollection = PDFPortableCollection.newInstance(coverSheet); | |
pdfPortableCollection.setRootFolder("Root"); | |
coverSheet.requireCatalog().setCollection(pdfPortableCollection); | |
/* | |
* Then we add a "schema" to the collection. We want to sort the | |
* files based on their filename so we set that up here. | |
*/ | |
pdfCollectionSchema = PDFCollectionSchema.newInstance(coverSheet); | |
PDFCollectionField field = PDFCollectionField.newInstance(coverSheet, PDFCollectionFieldType.file, "Name"); | |
field.setOrder(0); | |
pdfCollectionSchema.set(ASName.create("FileName"), field); | |
PDFCollectionSort sort = PDFCollectionSort.newInstance(coverSheet, new ASName[]{ASName.create("FileName"), ASName.create("Datalogics:Title")}); | |
pdfPortableCollection.setSort(sort); | |
/* | |
* Now we can begin adding files. If the input is a .zip file, we | |
* expand that into a temporary folder first. Otherwise we use the | |
* folder full of files as input | |
*/ | |
String attachmentsFolder; | |
if (inputFiles.endsWith(".zip")) { | |
unZip(inputFiles); | |
attachmentsFolder = outputDir+"/temp"; | |
} | |
else { | |
attachmentsFolder = inputFiles; | |
} | |
// Add the files and folders by recursing the root folder | |
addFilesAndFolders(attachmentsFolder, pdfPortableCollection.getRootFolder()); | |
//Set the schema for the collection | |
pdfPortableCollection.setSchema(pdfCollectionSchema); | |
//collectionDict.setInitialDocumentName(new ASString(new File(pdfFile1).getAbsolutePath())); | |
coverSheet.requireCatalog().setCollection(pdfPortableCollection); | |
// | |
// ------------ Project complete. Save the new collection --------- | |
ByteWriter outputWriter = SampleFileServices.getRAFByteWriter( | |
outputDir + "NewCollection.pdf"); | |
PDFSaveFullOptions fullOptions = PDFSaveFullOptions.newInstance(PDFVersion.v1_7); | |
coverSheet.save(outputWriter, fullOptions); | |
} finally { | |
coverSheet.close(); | |
} | |
System.out.println("Done!"); | |
} | |
/** | |
* Decompresses a zip file to a temporary folder in the output directory | |
* | |
* @param zipFile the path to the .zip file to be decompressed | |
* @throws ZipException | |
* @throws IOException | |
*/ | |
@SuppressWarnings("resource") | |
static public void unZip(String zipFile) throws ZipException, IOException | |
{ | |
int BUFFER = 2048; | |
File file = new File(zipFile); | |
ZipFile zip = new ZipFile(file); | |
String newPath = zipFile.substring(0, zipFile.length() - 4); | |
new File(newPath).mkdir(); | |
Enumeration<? extends ZipEntry> zipFileEntries = zip.entries(); | |
while (zipFileEntries.hasMoreElements()) | |
{ | |
ZipEntry entry = (ZipEntry) zipFileEntries.nextElement(); | |
String currentEntry = entry.getName(); | |
File destFile = new File(outputDir+"/temp", currentEntry); | |
File destinationParent = destFile.getParentFile(); | |
destinationParent.mkdirs(); | |
if (!entry.isDirectory()) | |
{ | |
BufferedInputStream is = new BufferedInputStream(zip | |
.getInputStream(entry)); | |
int currentByte; | |
byte data[] = new byte[BUFFER]; | |
FileOutputStream fos = new FileOutputStream(destFile); | |
BufferedOutputStream dest = new BufferedOutputStream(fos, BUFFER); | |
while ((currentByte = is.read(data, 0, BUFFER)) != -1) { | |
dest.write(data, 0, currentByte); | |
} | |
dest.flush(); | |
dest.close(); | |
is.close(); | |
} | |
if (currentEntry.endsWith(".zip")) | |
{ | |
unZip(destFile.getAbsolutePath()); | |
} | |
} | |
} | |
/** | |
* Recursively look for files and folders from the initial input folder path to add to the PDF Collection | |
* | |
* @param path The path of the file to add | |
* @param currentFolder The collection object to add the path to | |
* @throws Exception | |
*/ | |
public static void addFilesAndFolders(String path, PDFCollectionFolder currentFolder) throws Exception { | |
File root = new File(path); | |
File[] list = root.listFiles(); | |
if (list == null) return; | |
for (File f : list) { | |
if (f.isDirectory()) { | |
// If the File is a folder, create the folder within it's parent | |
PDFCollectionFolder newFolder = PDFCollectionFolder.newInstance(currentFolder, f.getName()); | |
// Create the Collection Item and set it's key/value pairs so that a folder name appears in the Acrobat UI | |
PDFCollectionItem pdfCollectionItem = PDFCollectionItem.newInstance(coverSheet); | |
PDFCollectionItemData titleData = PDFCollectionItemData.newInstance(coverSheet, f.getName(), ASName.create("Datalogics:Title")); | |
pdfCollectionItem.setData(ASName.create("Datalogics:Title"), titleData); | |
newFolder.setCollectionItem(pdfCollectionItem); | |
// Recourse | |
addFilesAndFolders(f.getAbsolutePath(), newFolder ); | |
} | |
else { | |
attachFile(f.getAbsolutePath(), currentFolder); | |
} | |
} | |
} | |
/** | |
* Adds a file to the collection and the file metadata to the schema | |
* | |
* @param path The path to the file to add | |
* @param currentFolder The folder in the collection that the file is added to | |
* @throws Exception | |
*/ | |
private static void attachFile(String path, PDFCollectionFolder currentFolder) throws Exception | |
{ | |
File attFile = new File(path); | |
ByteReader attByteReader = new LazyRandomAccessFileByteReader(attFile); | |
PDFFileSpecification pdfFileSpecification; | |
try { | |
// First, follow the standard setup for embedded files by creating the appropriate data. | |
ASDate modDate = new ASDate(new Date(attFile.lastModified())); | |
PDFEmbeddedFile fileToEmbedd = PDFEmbeddedFile.newInstance(coverSheet, | |
PDFEmbeddedFileInfo.newInstance(coverSheet, | |
(int)attByteReader.length(), | |
modDate, | |
modDate), | |
attByteReader); | |
fileToEmbedd.setFilter(PDFFilterFlate.newInstance(coverSheet, null)); | |
pdfFileSpecification = PDFFileSpecification.newInstance(coverSheet, Paths.get(attFile.getName()).getFileName().toString().getBytes(), fileToEmbedd); | |
PDFCollectionUtil.addFileToPDFCollectionFolder(Paths.get(attFile.getName()).getFileName().toString(), pdfFileSpecification, currentFolder); | |
// Get metadata information from PDF and add it to the schema so | |
// that the Portfolio can display it. If the file isn't a PDF, just | |
// add a title field based on the filename. | |
PDFCollectionItem pdfCollectionItem = PDFCollectionItem.newInstance(coverSheet); | |
if (path.endsWith(".pdf")) { | |
File file = new File(path); | |
FileInputStream fis = new FileInputStream(file); | |
ByteReader byteReader = new InputStreamByteReader(fis); | |
PDFDocument attachment = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance()); | |
// The DocumentInfo ditionary holds the PDF metadata | |
CosDictionary infoCosDict = attachment.getDocumentInfo().getCosDictionary(); | |
Iterator<ASName> iterator = infoCosDict.keyIterator(); | |
while (iterator.hasNext()){ | |
ASName keyName = iterator.next(); | |
if (infoCosDict.get(keyName).getType() == CosObject.t_String) { | |
//Different Portfolio layouts can add fields to the schema so we differentiate ours by prefixing the field name with "Datalogics:" | |
ASName key = ASName.create("Datalogics:"+keyName.asString()); | |
String value = infoCosDict.get(keyName).textValue(); | |
PDFCollectionItemData pdfCollectionItemData = PDFCollectionItemData.newInstance(coverSheet, value, key); | |
pdfCollectionItem.setData(key, pdfCollectionItemData); | |
if (!pdfCollectionSchema.containsKey(keyName)) { | |
PDFCollectionField field = null; | |
//So that the date information appears correctly, we need to tell the schema to display the string value as a date. | |
if (keyName.toString().contains("Date")) { | |
field = PDFCollectionField.newInstance(coverSheet, PDFCollectionFieldType.date, keyName.asString()); | |
} | |
else { | |
field = PDFCollectionField.newInstance(coverSheet, PDFCollectionFieldType.text, keyName.asString()); | |
} | |
pdfCollectionSchema.set(key, field); | |
} | |
} | |
} | |
pdfFileSpecification.setCollectionItem(pdfCollectionItem); | |
} | |
else { | |
PDFCollectionItemData titleData = PDFCollectionItemData.newInstance(coverSheet, attFile.getName(), ASName.create("Datalogics:Title")); | |
pdfCollectionItem.setData(ASName.create("Datalogics:Title"), titleData); | |
pdfFileSpecification.setCollectionItem(pdfCollectionItem); | |
} | |
System.out.println("Added: " + attFile.getName()); | |
} catch (Exception e) { | |
attByteReader.close(); | |
throw e; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment