Created
June 7, 2016 15:18
-
-
Save addam/1ee1085f2d450dd737f9ca63bb83a2fd to your computer and use it in GitHub Desktop.
minimalistic script that creates a PDF file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_dict(obj, refs=tuple()): | |
return "<< " + "".join("/{} {}\n".format(key, format_value(value, refs)) for (key, value) in obj.items()) + ">>" | |
def format_value(value, refs=tuple()): | |
if value in refs: | |
return "{} 0 R".format(refs.index(value) + 1) | |
elif type(value) is dict: | |
return format_dict(value, refs) | |
elif type(value) is list: | |
return "[ " + " ".join(format_value(item, refs) for item in value) + " ]" | |
elif type(value) in (int, float): | |
return str(value) | |
elif type(value) is bool: | |
return "true" if value else "false" | |
else: | |
return "/{}".format(value) # this script can output only PDF names, no strings | |
def write_object(index, obj, refs, f, stream=None): | |
byte_count = f.write("{} 0 obj\n".format(index)) | |
if type(obj) is not dict: | |
stream, obj = obj, dict() | |
elif "stream" in obj: | |
stream = obj.pop("stream") | |
if stream: | |
obj["Length"] = len(stream) | |
byte_count += f.write(format_dict(obj, refs)) | |
if stream: | |
byte_count += f.write("\nstream\n") | |
byte_count += f.write(stream) | |
byte_count += f.write("\nendstream") | |
return byte_count + f.write("\nendobj\n") | |
def write(*data, file): | |
xref = list() | |
file.position = file.write("%PDF-1.4\n") | |
for index, obj in enumerate(data, 1): | |
xref.append(file.position) | |
file.position += write_object(index, obj, data, file) | |
xref_pos = file.position | |
file.write("xref\n0 {}\n".format(len(xref) + 1)) | |
file.write("{:010} {:05} f\n".format(0, 65536)) | |
for position in xref: | |
file.write("{:010} {:05} n\n".format(position, 0)) | |
file.write("trailer\n") | |
catalog = next(ob for ob in data if ob["Type"] == "Catalog") | |
f.write(format_dict({"Size": len(xref), "Root": catalog}, data)) | |
file.write("\nstartxref\n{}\n%%EOF\n".format(xref_pos)) | |
def encode(data): | |
from base64 import a85encode | |
from zlib import compress | |
if hasattr(data, "encode"): | |
data = data.encode() | |
return a85encode(compress(data), adobe=True, wrapcol=250)[2:].decode() | |
root = {"Type": "Pages", "MediaBox": [0, 0, 595, 842], "Kids": list()} | |
catalog = {"Type": "Catalog", "Pages": root} | |
font = {"Type": "Font", "Subtype": "Type1", "Name": "Font1", "BaseFont": "Helvetica", "Encoding": "MacRomanEncoding"} | |
# All graphic commands use prefix notation and have a fixed number of parameters | |
content_streams = [ | |
"BT /F1 10 Tf 1 1 Td (Hello world) Tj ET", # BT..ET: begin..end text; /F1 references the font resource | |
"15 25 m 15 35 l S 4 w [ 4 6 ] 0 d 15 25 m 40 25 l S [ ] 0 d 1 w", # m, l, S: move, line, stroke; w: width; d: dasharray | |
"100 0 0 100 0 0 cm /Im1 Do"] # cm: matrix in column-major notation; Do: draw object, images are always 1x1 | |
image = {"Type": "XObject", "Subtype": "Image", "Width": 2, "Height": 3, "ColorSpace": "DeviceGray", "BitsPerComponent": 8, "Interpolate": False, "Filter": ["ASCII85Decode", "FlateDecode"], "stream": encode(b"\x22\x44\x00\x88\xff\xcc")} | |
for content in content_streams: | |
# in fact, Im1 should be a resource only on the page that uses it | |
resources = {"Font": {"F1": font}, "XObject": {"Im1": image} if content.find("Do") != -1 else dict()} | |
page = {"Type": "Page", "Parent": root, "Contents": content, "Resources": resources} | |
root["Kids"].append(page) | |
root["Count"] = len(root["Kids"]) | |
with open("out.pdf", "w+") as f: | |
write(catalog, root, font, *root["Kids"], *content_streams, image, file=f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment