Last active
November 3, 2023 15:27
-
-
Save Romern/2dd6fe3be58cf7e71f7f87dee616ee6a to your computer and use it in GitHub Desktop.
PostScript function to embed files in a PDF easily using pdfmark (by defaults dumps /tmp/* ) (e.g. ```gs -sDEVICE=pdfwrite -o foo.pdf embedfile.ps```). Python script extracts the files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%!PS | |
%%%%%%%%%%%%%%%%%%%Helper Functions | |
% (a) (b) -> (ab) | |
/concatstrings { exch dup length | |
2 index length add string | |
dup dup 4 2 roll copy length | |
4 -1 roll putinterval | |
} bind def | |
%https://comp.lang.postscript.narkive.com/lb2y58U5/string-replace-in-postscript | |
% comp.lang.postscript FAQ 7.8: | |
% string1 string2 *append* string | |
% Function: Concatenates two strings together. | |
/append { | |
2 copy length exch length add % Find the length of the new. | |
string dup % string1 string2 string string | |
4 2 roll % string string string1 string2 | |
2 index 0 3 index % string string string1 string2 ... | |
% ... string 0 string1 | |
putinterval % Stuff the first string in. | |
% string string string1 string2 | |
exch length exch putinterval | |
} bind def | |
% replace string find *findandreplaceall* string' | |
/findandreplaceall { | |
() 4 1 roll % string' is an empty string initially. | |
{ | |
search { % string' replace post find pre | |
3 index append 5 -1 roll % replace post find tail' string' | |
exch append 4 1 roll % string' replace post find | |
} { | |
exch pop append | |
exit | |
} ifelse | |
} loop | |
} bind def | |
%%%%%%%%%%%%%%%%%%%Actual code | |
% Embeds the file located at parameter 1 into the PDF | |
% https://ghostscript.com/blog/zugferd.html | |
/EmbedFile { | |
/inputFileName exch def | |
% As pdfmark does not support dynamically generated objname's, generate the code dynamically and replace the name with a dynamically generated one | |
({) inputFileName concatstrings (Stream}) concatstrings ( | |
[ /_objdef {InvoiceStream} /type /stream /OBJ pdfmark | |
[ {InvoiceStream} << /Type /EmbeddedFile /Subtype (application/octet-stream) cvn >> /PUT pdfmark | |
[ {InvoiceStream} inputFileName (r) file /PUT pdfmark | |
[ {InvoiceStream} /CLOSE pdfmark | |
[ /Name inputFileName /FS << | |
/Type /FileSpec | |
/F inputFileName | |
/AFRelationship /Alternative | |
/EF << /F {InvoiceStream} >> | |
>> /EMBED pdfmark | |
) ({InvoiceStream}) findandreplaceall cvx exec | |
} def | |
(/tmp/*) { %filenameforall | |
{ | |
EmbedFile | |
} stopped {} {} ifelse | |
} 4096 string filenameforall |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# very slightly modified version of https://gist.github.com/kevinl95/29a9e18d474eb6e23372074deff2df38 with cmd arguments and by default no output | |
import PyPDF2 | |
import sys | |
from pathlib import Path | |
def getAttachments(reader): | |
""" | |
Retrieves the file attachments of the PDF as a dictionary of file names | |
and the file data as a bytestring. | |
:return: dictionary of filenames and bytestrings | |
""" | |
catalog = reader.trailer["/Root"] | |
fileNames = catalog['/Names']['/EmbeddedFiles']['/Kids'][0].getObject()['/Names'] | |
attachments = {} | |
for f in fileNames: | |
if isinstance(f, str): | |
name = f | |
dataIndex = fileNames.index(f) + 1 | |
fDict = fileNames[dataIndex].getObject() | |
fData = fDict['/EF']['/F'].getData() | |
attachments[name] = fData | |
return attachments | |
handler = open(sys.argv[1], 'rb') | |
reader = PyPDF2.PdfFileReader(handler) | |
dictionary = getAttachments(reader) | |
# print(dictionary) | |
for fName, fData in dictionary.items(): | |
path = Path.cwd() / ("." + str(Path(fName).resolve())) | |
path.parent.mkdir(parents=True, exist_ok=True) | |
with open(path, 'wb') as outfile: | |
outfile.write(fData) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment