Created
March 27, 2014 10:11
-
-
Save evanbrumley/9804328 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A bunch of PDF utilities for Django, wkhtmltopdf and pdftk | |
Note: These have been cobbled together from various projects, | |
and probably can't be trusted. Make sure you read over | |
and full understand the code fully before attempting to use! | |
""" | |
import sys, os, csv | |
import subprocess | |
import datetime | |
from tempfile import NamedTemporaryFile | |
from pipes import quote | |
from django.conf import settings | |
from django.template import RequestContext | |
from django.template.loader import render_to_string | |
from django.template import loader | |
from django.contrib.sites.models import Site | |
from django.core.files.base import ContentFile | |
from django.http import HttpResponse | |
def get_file_location_from_template_name(template_name): | |
template_dirs = getattr(settings, 'TEMPLATE_DIRS') | |
for template_dir in template_dirs: | |
location = os.path.join(template_dir, template_name) | |
if os.path.exists(location): | |
return location | |
return None | |
def render_pdf(file_contents, file_name='output.pdf'): | |
response = HttpResponse(mimetype='application/pdf') | |
if not file_name.endswith('.pdf'): | |
file_name = file_name + '.pdf' | |
response['Content-Disposition'] = 'filename=%s' % file_name | |
response.write(file_contents) | |
return response | |
def render_png(file_contents, file_name='output.png'): | |
response = HttpResponse(mimetype='image/png') | |
if not file_name.endswith('.png'): | |
file_name = file_name + '.png' | |
response['Content-Disposition'] = 'filename=%s' % file_name | |
response.write(file_contents) | |
return response | |
def process_value_for_fdf(val): | |
""" | |
Due to a weird bug in PDFTK 1.44, we can't handle parens | |
in pdf forms. For the moment, we just replace them with | |
square brackets. | |
""" | |
if not val: | |
return val | |
val = val.replace("(", "[") | |
val = val.replace(")", "]") | |
return val | |
def create_pdf_from_pdf(template_name, context, flatten=True): | |
""" | |
Given a PDF template and context returns a PDF file to the user. | |
The PDF should contain a form, and the context should refer to fields in that form. | |
Prerequisites: fdfgen (available on PyPI) and pdftk (http://www.pdflabs.com/docs/install-pdftk/) | |
""" | |
from fdfgen import forge_fdf | |
template_location = get_file_location_from_template_name(template_name) | |
if not template_location: | |
raise Exception("PDF Template %s does not exist!" % template_name) | |
fields = [(key, process_value_for_fdf(val)) for key, val in context.iteritems()] | |
fdf = forge_fdf("", fields, [], [], []) | |
fdf_file = NamedTemporaryFile(delete=False, suffix='.fdf') | |
fdf_file.write(fdf) | |
fdf_file.close() | |
command = 'pdftk %s fill_form %s output -' % (template_location, fdf_file.name) | |
if flatten: | |
command = command + ' flatten' | |
popen = subprocess.Popen(command, bufsize=4096, | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE, | |
shell=True) | |
pdf_contents = popen.stdout.read() | |
popen.wait() | |
os.unlink(fdf_file.name) | |
return pdf_contents | |
def create_pdf_from_html(template_name, context, toc=False, include_footer=False, \ | |
margin_bottom=0, margin_top=0, margin_left=0, \ | |
margin_right=0, javascript_delay=0, zoom=1.0, page_size='A4'): | |
""" | |
Given a template, context and filename, returns a PDF file to the user. | |
Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/) | |
""" | |
context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/' | |
rendered = render_to_string(template_name, context) | |
rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT) | |
rendered = rendered.encode('ascii', 'xmlcharrefreplace') | |
html_file = NamedTemporaryFile(delete=False, suffix='.html') | |
html_file.write(rendered) | |
html_file.close() | |
margin_args = '' | |
if type(margin_bottom) in (int, float): | |
margin_args += ' --margin-bottom %s' % margin_bottom | |
if type(margin_top) in (int, float): | |
margin_args += ' --margin-top %s' % margin_top | |
if type(margin_left) in (int, float): | |
margin_args += ' --margin-left %s' % margin_left | |
if type(margin_right) in (int, float): | |
margin_args += ' --margin-right %s' % margin_right | |
zoom_arg = '' | |
if type(zoom) in (int, float): | |
zoom_arg = ' --zoom %s' % zoom | |
page_size_arg = '' | |
if page_size: | |
page_size_arg = ' --page-size %s' % page_size | |
if toc: | |
extra_args = 'toc' | |
else: | |
extra_args = '' | |
if include_footer: | |
left_footer = "Page [page] of [toPage]" | |
left_footer_arg = quote(left_footer) | |
right_footer = "PDF Generated by on %s" % ( | |
datetime.datetime.today().strftime("%b %d %Y"), | |
) | |
# Remove square brackets so wkhtml doesn't get confuzzled | |
right_footer = right_footer.replace('[', '') | |
right_footer = right_footer.replace(']', '') | |
# Escape and surround the right footer with quotes | |
right_footer_arg = quote(right_footer) | |
footer_args = '--footer-left %s --footer-right %s' % ( | |
left_footer_arg, | |
right_footer_arg, | |
) | |
else: | |
footer_args = '' | |
command_args = 'wkhtmltopdf %s --javascript-delay %s %s %s %s %s %s -' % ( | |
margin_args, | |
javascript_delay, | |
page_size_arg, | |
zoom_arg, | |
footer_args, | |
extra_args, | |
html_file.name | |
) | |
popen = subprocess.Popen(command_args, bufsize=4096, | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE, | |
shell=True) | |
pdf_contents = popen.stdout.read() | |
popen.wait() | |
os.unlink(html_file.name) | |
return pdf_contents | |
def create_png_from_html(template_name, context, width=None, height=None, crop_h=None, crop_w=None, crop_x=None, crop_y=None, javascript_delay=0): | |
""" | |
Given a template, context and filename, returns a PNG file | |
Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/) | |
""" | |
context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/' | |
rendered = render_to_string(template_name, context) | |
rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT) | |
rendered = rendered.encode('ascii', 'xmlcharrefreplace') | |
html_file = NamedTemporaryFile(delete=False, suffix='.html') | |
html_file.write(rendered) | |
html_file.close() | |
crop_args = '' | |
if type(crop_h) in (int, float): | |
crop_args += ' --crop-h %s' % crop_h | |
if type(crop_w) in (int, float): | |
crop_args += ' --crop-w %s' % crop_w | |
if type(crop_x) in (int, float): | |
crop_args += ' --crop-x %s' % crop_x | |
if type(crop_y) in (int, float): | |
crop_args += ' --crop-y %s' % crop_y | |
width_arg = '' | |
if type(width) in (int, float): | |
width_arg = ' --width %s' % width | |
height_arg = '' | |
if type(height) in (int, float): | |
height_arg = ' --height %s' % height | |
command = 'wkhtmltoimage --format png --javascript-delay %s %s %s %s %s -' % ( | |
javascript_delay, | |
crop_args, | |
width_arg, | |
height_arg, | |
html_file.name | |
) | |
popen = subprocess.Popen(command, bufsize=4096, | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE, | |
shell=True) | |
image_contents = popen.stdout.read() | |
popen.wait() | |
os.unlink(html_file.name) | |
return image_contents | |
def overlay_pdfs(overlay, background, overlay_is_template=False, background_is_template=False): | |
""" | |
Overlays one pdf on top of another. | |
Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/) | |
""" | |
if overlay_is_template: | |
overlay_file_name = get_file_location_from_template_name(overlay) | |
if not overlay_file_name: | |
raise Exception("PDF Template %s does not exist!" % overlay) | |
else: | |
overlay_file = NamedTemporaryFile(delete=False, suffix='.pdf') | |
overlay_file.write(overlay) | |
overlay_file.close() | |
overlay_file_name = overlay_file.name | |
if background_is_template: | |
background_file_name = get_file_location_from_template_name(background) | |
if not background_file_name: | |
raise Exception("PDF Template %s does not exist!" % background) | |
else: | |
background_file = NamedTemporaryFile(delete=False, suffix='.pdf') | |
background_file.write(background) | |
background_file.close() | |
background_file_name = background_file.name | |
command = "pdftk %s multibackground %s output -" % (overlay_file_name, background_file_name) | |
popen = subprocess.Popen(command, bufsize=4096, | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE, | |
shell=True) | |
pdf_contents = popen.stdout.read() | |
popen.wait() | |
if not overlay_is_template: | |
os.unlink(overlay_file.name) | |
if not background_is_template: | |
os.unlink(background_file.name) | |
return pdf_contents | |
def join_pdfs(*pdfs): | |
""" | |
Joins multiple pdf files | |
Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/) | |
""" | |
file_names = [] | |
temporary_files = [] | |
for pdf in pdfs: | |
if hasattr(pdf, 'name'): | |
pdf_path = settings.MEDIA_ROOT + '/' + pdf.name | |
file_names.append(pdf_path) | |
elif pdf.endswith('.pdf'): | |
file_names.append(get_file_location_from_template_name(pdf)) | |
else: | |
pdf_file = NamedTemporaryFile(delete=False, suffix='.pdf') | |
pdf_file.write(pdf) | |
pdf_file.close() | |
temporary_files.append(pdf_file) | |
file_names.append(pdf_file.name) | |
command = "pdftk %s cat output -" % ' '.join(file_names) | |
popen = subprocess.Popen(command, bufsize=4096, | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE, | |
shell=True) | |
pdf_contents = popen.stdout.read() | |
popen.wait() | |
for temporary_file in temporary_files: | |
os.unlink(temporary_file.name) | |
return pdf_contents |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Breaking change with wkhtmltopdf 0.12.6
If images from the filesystem are embedded then the updated version of wkhtmltopdf will cause an error unless ether the
--enable-local-file-access flag is passed in or --allow {path} is used.