Skip to content

Instantly share code, notes, and snippets.

@evanbrumley
Created March 27, 2014 10:11
Show Gist options
  • Save evanbrumley/9804328 to your computer and use it in GitHub Desktop.
Save evanbrumley/9804328 to your computer and use it in GitHub Desktop.
"""
A bunch of PDF utilities for Django, wkhtmltopdf and pdftk
Note: These have been cobbled together from various projects,
and probably can't be trusted. Make sure you read over
and full understand the code fully before attempting to use!
"""
import sys, os, csv
import subprocess
import datetime
from tempfile import NamedTemporaryFile
from pipes import quote
from django.conf import settings
from django.template import RequestContext
from django.template.loader import render_to_string
from django.template import loader
from django.contrib.sites.models import Site
from django.core.files.base import ContentFile
from django.http import HttpResponse
def get_file_location_from_template_name(template_name):
template_dirs = getattr(settings, 'TEMPLATE_DIRS')
for template_dir in template_dirs:
location = os.path.join(template_dir, template_name)
if os.path.exists(location):
return location
return None
def render_pdf(file_contents, file_name='output.pdf'):
response = HttpResponse(mimetype='application/pdf')
if not file_name.endswith('.pdf'):
file_name = file_name + '.pdf'
response['Content-Disposition'] = 'filename=%s' % file_name
response.write(file_contents)
return response
def render_png(file_contents, file_name='output.png'):
response = HttpResponse(mimetype='image/png')
if not file_name.endswith('.png'):
file_name = file_name + '.png'
response['Content-Disposition'] = 'filename=%s' % file_name
response.write(file_contents)
return response
def process_value_for_fdf(val):
"""
Due to a weird bug in PDFTK 1.44, we can't handle parens
in pdf forms. For the moment, we just replace them with
square brackets.
"""
if not val:
return val
val = val.replace("(", "[")
val = val.replace(")", "]")
return val
def create_pdf_from_pdf(template_name, context, flatten=True):
"""
Given a PDF template and context returns a PDF file to the user.
The PDF should contain a form, and the context should refer to fields in that form.
Prerequisites: fdfgen (available on PyPI) and pdftk (http://www.pdflabs.com/docs/install-pdftk/)
"""
from fdfgen import forge_fdf
template_location = get_file_location_from_template_name(template_name)
if not template_location:
raise Exception("PDF Template %s does not exist!" % template_name)
fields = [(key, process_value_for_fdf(val)) for key, val in context.iteritems()]
fdf = forge_fdf("", fields, [], [], [])
fdf_file = NamedTemporaryFile(delete=False, suffix='.fdf')
fdf_file.write(fdf)
fdf_file.close()
command = 'pdftk %s fill_form %s output -' % (template_location, fdf_file.name)
if flatten:
command = command + ' flatten'
popen = subprocess.Popen(command, bufsize=4096,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True)
pdf_contents = popen.stdout.read()
popen.wait()
os.unlink(fdf_file.name)
return pdf_contents
def create_pdf_from_html(template_name, context, toc=False, include_footer=False, \
margin_bottom=0, margin_top=0, margin_left=0, \
margin_right=0, javascript_delay=0, zoom=1.0, page_size='A4'):
"""
Given a template, context and filename, returns a PDF file to the user.
Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/)
"""
context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/'
rendered = render_to_string(template_name, context)
rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT)
rendered = rendered.encode('ascii', 'xmlcharrefreplace')
html_file = NamedTemporaryFile(delete=False, suffix='.html')
html_file.write(rendered)
html_file.close()
margin_args = ''
if type(margin_bottom) in (int, float):
margin_args += ' --margin-bottom %s' % margin_bottom
if type(margin_top) in (int, float):
margin_args += ' --margin-top %s' % margin_top
if type(margin_left) in (int, float):
margin_args += ' --margin-left %s' % margin_left
if type(margin_right) in (int, float):
margin_args += ' --margin-right %s' % margin_right
zoom_arg = ''
if type(zoom) in (int, float):
zoom_arg = ' --zoom %s' % zoom
page_size_arg = ''
if page_size:
page_size_arg = ' --page-size %s' % page_size
if toc:
extra_args = 'toc'
else:
extra_args = ''
if include_footer:
left_footer = "Page [page] of [toPage]"
left_footer_arg = quote(left_footer)
right_footer = "PDF Generated by on %s" % (
datetime.datetime.today().strftime("%b %d %Y"),
)
# Remove square brackets so wkhtml doesn't get confuzzled
right_footer = right_footer.replace('[', '')
right_footer = right_footer.replace(']', '')
# Escape and surround the right footer with quotes
right_footer_arg = quote(right_footer)
footer_args = '--footer-left %s --footer-right %s' % (
left_footer_arg,
right_footer_arg,
)
else:
footer_args = ''
command_args = 'wkhtmltopdf %s --javascript-delay %s %s %s %s %s %s -' % (
margin_args,
javascript_delay,
page_size_arg,
zoom_arg,
footer_args,
extra_args,
html_file.name
)
popen = subprocess.Popen(command_args, bufsize=4096,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True)
pdf_contents = popen.stdout.read()
popen.wait()
os.unlink(html_file.name)
return pdf_contents
def create_png_from_html(template_name, context, width=None, height=None, crop_h=None, crop_w=None, crop_x=None, crop_y=None, javascript_delay=0):
"""
Given a template, context and filename, returns a PNG file
Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/)
"""
context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/'
rendered = render_to_string(template_name, context)
rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT)
rendered = rendered.encode('ascii', 'xmlcharrefreplace')
html_file = NamedTemporaryFile(delete=False, suffix='.html')
html_file.write(rendered)
html_file.close()
crop_args = ''
if type(crop_h) in (int, float):
crop_args += ' --crop-h %s' % crop_h
if type(crop_w) in (int, float):
crop_args += ' --crop-w %s' % crop_w
if type(crop_x) in (int, float):
crop_args += ' --crop-x %s' % crop_x
if type(crop_y) in (int, float):
crop_args += ' --crop-y %s' % crop_y
width_arg = ''
if type(width) in (int, float):
width_arg = ' --width %s' % width
height_arg = ''
if type(height) in (int, float):
height_arg = ' --height %s' % height
command = 'wkhtmltoimage --format png --javascript-delay %s %s %s %s %s -' % (
javascript_delay,
crop_args,
width_arg,
height_arg,
html_file.name
)
popen = subprocess.Popen(command, bufsize=4096,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True)
image_contents = popen.stdout.read()
popen.wait()
os.unlink(html_file.name)
return image_contents
def overlay_pdfs(overlay, background, overlay_is_template=False, background_is_template=False):
"""
Overlays one pdf on top of another.
Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/)
"""
if overlay_is_template:
overlay_file_name = get_file_location_from_template_name(overlay)
if not overlay_file_name:
raise Exception("PDF Template %s does not exist!" % overlay)
else:
overlay_file = NamedTemporaryFile(delete=False, suffix='.pdf')
overlay_file.write(overlay)
overlay_file.close()
overlay_file_name = overlay_file.name
if background_is_template:
background_file_name = get_file_location_from_template_name(background)
if not background_file_name:
raise Exception("PDF Template %s does not exist!" % background)
else:
background_file = NamedTemporaryFile(delete=False, suffix='.pdf')
background_file.write(background)
background_file.close()
background_file_name = background_file.name
command = "pdftk %s multibackground %s output -" % (overlay_file_name, background_file_name)
popen = subprocess.Popen(command, bufsize=4096,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True)
pdf_contents = popen.stdout.read()
popen.wait()
if not overlay_is_template:
os.unlink(overlay_file.name)
if not background_is_template:
os.unlink(background_file.name)
return pdf_contents
def join_pdfs(*pdfs):
"""
Joins multiple pdf files
Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/)
"""
file_names = []
temporary_files = []
for pdf in pdfs:
if hasattr(pdf, 'name'):
pdf_path = settings.MEDIA_ROOT + '/' + pdf.name
file_names.append(pdf_path)
elif pdf.endswith('.pdf'):
file_names.append(get_file_location_from_template_name(pdf))
else:
pdf_file = NamedTemporaryFile(delete=False, suffix='.pdf')
pdf_file.write(pdf)
pdf_file.close()
temporary_files.append(pdf_file)
file_names.append(pdf_file.name)
command = "pdftk %s cat output -" % ' '.join(file_names)
popen = subprocess.Popen(command, bufsize=4096,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True)
pdf_contents = popen.stdout.read()
popen.wait()
for temporary_file in temporary_files:
os.unlink(temporary_file.name)
return pdf_contents
@Shmik
Copy link

Shmik commented Apr 12, 2021

Breaking change with wkhtmltopdf 0.12.6

If images from the filesystem are embedded then the updated version of wkhtmltopdf will cause an error unless ether the
--enable-local-file-access flag is passed in or --allow {path} is used.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment