evanbrumley · March 27, 2014 10:11 · Shmik · Apr 12, 2021
diff --git a/django_pdf_utils.py b/django_pdf_utils.py
 """
 A bunch of PDF utilities for Django, wkhtmltopdf and pdftk

 Note: These have been cobbled together from various projects,
      and probably can't be trusted. Make sure you read over
      and full understand the code fully before attempting to use!
 """

 import sys, os, csv
 import subprocess
 import datetime
 from tempfile import NamedTemporaryFile
 from pipes import quote

 from django.conf import settings
 from django.template import RequestContext
 from django.template.loader import render_to_string
 from django.template import loader
 from django.contrib.sites.models import Site
 from django.core.files.base import ContentFile
 from django.http import HttpResponse



 def get_file_location_from_template_name(template_name):
    template_dirs = getattr(settings, 'TEMPLATE_DIRS')
    for template_dir in template_dirs:
        location = os.path.join(template_dir, template_name)
        if os.path.exists(location):
            return location

    return None


 def render_pdf(file_contents, file_name='output.pdf'):
    response = HttpResponse(mimetype='application/pdf')

    if not file_name.endswith('.pdf'):
        file_name = file_name + '.pdf'

    response['Content-Disposition'] = 'filename=%s' % file_name

    response.write(file_contents)
    
    return response


 def render_png(file_contents, file_name='output.png'):
    response = HttpResponse(mimetype='image/png')

    if not file_name.endswith('.png'):
        file_name = file_name + '.png'

    response['Content-Disposition'] = 'filename=%s' % file_name

    response.write(file_contents)

    return response


 def process_value_for_fdf(val):
    """
    Due to a weird bug in PDFTK 1.44, we can't handle parens
    in pdf forms. For the moment, we just replace them with
    square brackets.
    """
    if not val:
        return val

    val = val.replace("(", "[")
    val = val.replace(")", "]")

    return val


 def create_pdf_from_pdf(template_name, context, flatten=True):
    """
    Given a PDF template and context returns a PDF file to the user.
    The PDF should contain a form, and the context should refer to fields in that form.

    Prerequisites: fdfgen (available on PyPI) and pdftk (http://www.pdflabs.com/docs/install-pdftk/)
    """
    from fdfgen import forge_fdf

    template_location = get_file_location_from_template_name(template_name)

    if not template_location:
        raise Exception("PDF Template %s does not exist!" % template_name)

    fields = [(key, process_value_for_fdf(val)) for key, val in context.iteritems()]
    fdf = forge_fdf("", fields, [], [], [])

    fdf_file = NamedTemporaryFile(delete=False, suffix='.fdf')
    fdf_file.write(fdf)
    fdf_file.close()

    command = 'pdftk %s fill_form %s output -' % (template_location, fdf_file.name)
    if flatten:
        command = command + ' flatten'

    popen = subprocess.Popen(command, bufsize=4096,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                             shell=True)
    pdf_contents = popen.stdout.read()
    popen.wait()

    os.unlink(fdf_file.name)

    return pdf_contents


 def create_pdf_from_html(template_name, context, toc=False, include_footer=False, \
                           margin_bottom=0, margin_top=0, margin_left=0, \
                           margin_right=0, javascript_delay=0, zoom=1.0, page_size='A4'):
    """
    Given a template, context and filename, returns a PDF file to the user.
    Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/)
    """

    context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/'
    rendered = render_to_string(template_name, context)

    rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT)

    rendered = rendered.encode('ascii', 'xmlcharrefreplace')

    html_file = NamedTemporaryFile(delete=False, suffix='.html')
    html_file.write(rendered)
    html_file.close()

    margin_args = ''
    if type(margin_bottom) in (int, float):
        margin_args += ' --margin-bottom %s' % margin_bottom

    if type(margin_top) in (int, float):
        margin_args += ' --margin-top %s' % margin_top

    if type(margin_left) in (int, float):
        margin_args += ' --margin-left %s' % margin_left

    if type(margin_right) in (int, float):
        margin_args += ' --margin-right %s' % margin_right

    zoom_arg = ''
    if type(zoom) in (int, float):
        zoom_arg = ' --zoom %s' % zoom

    page_size_arg = ''
    if page_size:
        page_size_arg = ' --page-size %s' % page_size

    if toc:
        extra_args = 'toc'
    else:
        extra_args = ''

    if include_footer:
        left_footer = "Page [page] of [toPage]"
        left_footer_arg = quote(left_footer)

        right_footer = "PDF Generated by on %s" % (
            datetime.datetime.today().strftime("%b %d %Y"),
        )

        # Remove square brackets so wkhtml doesn't get confuzzled
        right_footer = right_footer.replace('[', '')
        right_footer = right_footer.replace(']', '')

        # Escape and surround the right footer with quotes
        right_footer_arg = quote(right_footer)

        footer_args = '--footer-left %s --footer-right %s' % (
            left_footer_arg,
            right_footer_arg,
        )

    else:
        footer_args = ''

    command_args = 'wkhtmltopdf %s --javascript-delay %s %s %s %s %s %s -' % (
        margin_args,
        javascript_delay,
        page_size_arg,
        zoom_arg,
        footer_args,
        extra_args,
        html_file.name
    )

    popen = subprocess.Popen(command_args, bufsize=4096,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                             shell=True)
    pdf_contents = popen.stdout.read()
    popen.wait()

    os.unlink(html_file.name)

    return pdf_contents


 def create_png_from_html(template_name, context, width=None, height=None, crop_h=None, crop_w=None, crop_x=None, crop_y=None, javascript_delay=0):
    """
    Given a template, context and filename, returns a PNG file
    Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/)
    """

    context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/'
    rendered = render_to_string(template_name, context)

    rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT)

    rendered = rendered.encode('ascii', 'xmlcharrefreplace')

    html_file = NamedTemporaryFile(delete=False, suffix='.html')
    html_file.write(rendered)
    html_file.close()

    crop_args = ''
    if type(crop_h) in (int, float):
        crop_args += ' --crop-h %s' % crop_h

    if type(crop_w) in (int, float):
        crop_args += ' --crop-w %s' % crop_w

    if type(crop_x) in (int, float):
        crop_args += ' --crop-x %s' % crop_x

    if type(crop_y) in (int, float):
        crop_args += ' --crop-y %s' % crop_y

    width_arg = ''
    if type(width) in (int, float):
        width_arg = ' --width %s' % width

    height_arg = ''
    if type(height) in (int, float):
        height_arg = ' --height %s' % height

    command = 'wkhtmltoimage --format png --javascript-delay %s %s %s %s %s -' % (
        javascript_delay,
        crop_args,
        width_arg,
        height_arg,
        html_file.name
    )

    popen = subprocess.Popen(command, bufsize=4096,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                             shell=True)
    image_contents = popen.stdout.read()
    popen.wait()

    os.unlink(html_file.name)

    return image_contents


 def overlay_pdfs(overlay, background, overlay_is_template=False, background_is_template=False):
  """
  Overlays one pdf on top of another.
  Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/)
  """
    if overlay_is_template:
        overlay_file_name = get_file_location_from_template_name(overlay)

        if not overlay_file_name:
            raise Exception("PDF Template %s does not exist!" % overlay)
    else:
        overlay_file = NamedTemporaryFile(delete=False, suffix='.pdf')
        overlay_file.write(overlay)
        overlay_file.close()
        overlay_file_name = overlay_file.name

    if background_is_template:
        background_file_name = get_file_location_from_template_name(background)

        if not background_file_name:
            raise Exception("PDF Template %s does not exist!" % background)
    else:
        background_file = NamedTemporaryFile(delete=False, suffix='.pdf')
        background_file.write(background)
        background_file.close()
        background_file_name = background_file.name


    command = "pdftk %s multibackground %s output -" % (overlay_file_name, background_file_name)

    popen = subprocess.Popen(command, bufsize=4096,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                             shell=True)

    pdf_contents = popen.stdout.read()
    popen.wait()

    if not overlay_is_template:
        os.unlink(overlay_file.name)

    if not background_is_template:
        os.unlink(background_file.name)

    return pdf_contents


 def join_pdfs(*pdfs):
    """
    Joins multiple pdf files
    Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/)
    """
    file_names = []
    temporary_files = []

    for pdf in pdfs:
        if hasattr(pdf, 'name'):
            pdf_path = settings.MEDIA_ROOT + '/' + pdf.name
            file_names.append(pdf_path)
        elif pdf.endswith('.pdf'):
            file_names.append(get_file_location_from_template_name(pdf))
        else:
            pdf_file = NamedTemporaryFile(delete=False, suffix='.pdf')
            pdf_file.write(pdf)
            pdf_file.close()
            temporary_files.append(pdf_file)
            file_names.append(pdf_file.name)

    command = "pdftk %s cat output -" % ' '.join(file_names)

    popen = subprocess.Popen(command, bufsize=4096,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                             shell=True)

    pdf_contents = popen.stdout.read()
    popen.wait()

    for temporary_file in temporary_files:
        os.unlink(temporary_file.name)

    return pdf_contents
	"""
	A bunch of PDF utilities for Django, wkhtmltopdf and pdftk

	Note: These have been cobbled together from various projects,
	and probably can't be trusted. Make sure you read over
	and full understand the code fully before attempting to use!
	"""

	import sys, os, csv
	import subprocess
	import datetime
	from tempfile import NamedTemporaryFile
	from pipes import quote

	from django.conf import settings
	from django.template import RequestContext
	from django.template.loader import render_to_string
	from django.template import loader
	from django.contrib.sites.models import Site
	from django.core.files.base import ContentFile
	from django.http import HttpResponse



	def get_file_location_from_template_name(template_name):
	template_dirs = getattr(settings, 'TEMPLATE_DIRS')
	for template_dir in template_dirs:
	location = os.path.join(template_dir, template_name)
	if os.path.exists(location):
	return location

	return None


	def render_pdf(file_contents, file_name='output.pdf'):
	response = HttpResponse(mimetype='application/pdf')

	if not file_name.endswith('.pdf'):
	file_name = file_name + '.pdf'

	response['Content-Disposition'] = 'filename=%s' % file_name

	response.write(file_contents)

	return response


	def render_png(file_contents, file_name='output.png'):
	response = HttpResponse(mimetype='image/png')

	if not file_name.endswith('.png'):
	file_name = file_name + '.png'

	response['Content-Disposition'] = 'filename=%s' % file_name

	response.write(file_contents)

	return response


	def process_value_for_fdf(val):
	"""
	Due to a weird bug in PDFTK 1.44, we can't handle parens
	in pdf forms. For the moment, we just replace them with
	square brackets.
	"""
	if not val:
	return val

	val = val.replace("(", "[")
	val = val.replace(")", "]")

	return val


	def create_pdf_from_pdf(template_name, context, flatten=True):
	"""
	Given a PDF template and context returns a PDF file to the user.
	The PDF should contain a form, and the context should refer to fields in that form.

	Prerequisites: fdfgen (available on PyPI) and pdftk (http://www.pdflabs.com/docs/install-pdftk/)
	"""
	from fdfgen import forge_fdf

	template_location = get_file_location_from_template_name(template_name)

	if not template_location:
	raise Exception("PDF Template %s does not exist!" % template_name)

	fields = [(key, process_value_for_fdf(val)) for key, val in context.iteritems()]
	fdf = forge_fdf("", fields, [], [], [])

	fdf_file = NamedTemporaryFile(delete=False, suffix='.fdf')
	fdf_file.write(fdf)
	fdf_file.close()

	command = 'pdftk %s fill_form %s output -' % (template_location, fdf_file.name)
	if flatten:
	command = command + ' flatten'

	popen = subprocess.Popen(command, bufsize=4096,
	stdout=subprocess.PIPE, stderr=subprocess.PIPE,
	shell=True)
	pdf_contents = popen.stdout.read()
	popen.wait()

	os.unlink(fdf_file.name)

	return pdf_contents


	def create_pdf_from_html(template_name, context, toc=False, include_footer=False, \
	margin_bottom=0, margin_top=0, margin_left=0, \
	margin_right=0, javascript_delay=0, zoom=1.0, page_size='A4'):
	"""
	Given a template, context and filename, returns a PDF file to the user.
	Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/)
	"""

	context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/'
	rendered = render_to_string(template_name, context)

	rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT)

	rendered = rendered.encode('ascii', 'xmlcharrefreplace')

	html_file = NamedTemporaryFile(delete=False, suffix='.html')
	html_file.write(rendered)
	html_file.close()

	margin_args = ''
	if type(margin_bottom) in (int, float):
	margin_args += ' --margin-bottom %s' % margin_bottom

	if type(margin_top) in (int, float):
	margin_args += ' --margin-top %s' % margin_top

	if type(margin_left) in (int, float):
	margin_args += ' --margin-left %s' % margin_left

	if type(margin_right) in (int, float):
	margin_args += ' --margin-right %s' % margin_right

	zoom_arg = ''
	if type(zoom) in (int, float):
	zoom_arg = ' --zoom %s' % zoom

	page_size_arg = ''
	if page_size:
	page_size_arg = ' --page-size %s' % page_size

	if toc:
	extra_args = 'toc'
	else:
	extra_args = ''

	if include_footer:
	left_footer = "Page [page] of [toPage]"
	left_footer_arg = quote(left_footer)

	right_footer = "PDF Generated by on %s" % (
	datetime.datetime.today().strftime("%b %d %Y"),
	)

	# Remove square brackets so wkhtml doesn't get confuzzled
	right_footer = right_footer.replace('[', '')
	right_footer = right_footer.replace(']', '')

	# Escape and surround the right footer with quotes
	right_footer_arg = quote(right_footer)

	footer_args = '--footer-left %s --footer-right %s' % (
	left_footer_arg,
	right_footer_arg,
	)

	else:
	footer_args = ''

	command_args = 'wkhtmltopdf %s --javascript-delay %s %s %s %s %s %s -' % (
	margin_args,
	javascript_delay,
	page_size_arg,
	zoom_arg,
	footer_args,
	extra_args,
	html_file.name
	)

	popen = subprocess.Popen(command_args, bufsize=4096,
	stdout=subprocess.PIPE, stderr=subprocess.PIPE,
	shell=True)
	pdf_contents = popen.stdout.read()
	popen.wait()

	os.unlink(html_file.name)

	return pdf_contents


	def create_png_from_html(template_name, context, width=None, height=None, crop_h=None, crop_w=None, crop_x=None, crop_y=None, javascript_delay=0):
	"""
	Given a template, context and filename, returns a PNG file
	Prerequisites: wkhtmltopdf (http://wkhtmltopdf.org/)
	"""

	context['base_url'] = 'file:///' + settings.STATIC_ROOT + '/'
	rendered = render_to_string(template_name, context)

	rendered = rendered.replace('src="' + settings.MEDIA_URL, 'src="file:///' + settings.MEDIA_ROOT)

	rendered = rendered.encode('ascii', 'xmlcharrefreplace')

	html_file = NamedTemporaryFile(delete=False, suffix='.html')
	html_file.write(rendered)
	html_file.close()

	crop_args = ''
	if type(crop_h) in (int, float):
	crop_args += ' --crop-h %s' % crop_h

	if type(crop_w) in (int, float):
	crop_args += ' --crop-w %s' % crop_w

	if type(crop_x) in (int, float):
	crop_args += ' --crop-x %s' % crop_x

	if type(crop_y) in (int, float):
	crop_args += ' --crop-y %s' % crop_y

	width_arg = ''
	if type(width) in (int, float):
	width_arg = ' --width %s' % width

	height_arg = ''
	if type(height) in (int, float):
	height_arg = ' --height %s' % height

	command = 'wkhtmltoimage --format png --javascript-delay %s %s %s %s %s -' % (
	javascript_delay,
	crop_args,
	width_arg,
	height_arg,
	html_file.name
	)

	popen = subprocess.Popen(command, bufsize=4096,
	stdout=subprocess.PIPE, stderr=subprocess.PIPE,
	shell=True)
	image_contents = popen.stdout.read()
	popen.wait()

	os.unlink(html_file.name)

	return image_contents


	def overlay_pdfs(overlay, background, overlay_is_template=False, background_is_template=False):
	"""
	Overlays one pdf on top of another.
	Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/)
	"""
	if overlay_is_template:
	overlay_file_name = get_file_location_from_template_name(overlay)

	if not overlay_file_name:
	raise Exception("PDF Template %s does not exist!" % overlay)
	else:
	overlay_file = NamedTemporaryFile(delete=False, suffix='.pdf')
	overlay_file.write(overlay)
	overlay_file.close()
	overlay_file_name = overlay_file.name

	if background_is_template:
	background_file_name = get_file_location_from_template_name(background)

	if not background_file_name:
	raise Exception("PDF Template %s does not exist!" % background)
	else:
	background_file = NamedTemporaryFile(delete=False, suffix='.pdf')
	background_file.write(background)
	background_file.close()
	background_file_name = background_file.name


	command = "pdftk %s multibackground %s output -" % (overlay_file_name, background_file_name)

	popen = subprocess.Popen(command, bufsize=4096,
	stdout=subprocess.PIPE, stderr=subprocess.PIPE,
	shell=True)

	pdf_contents = popen.stdout.read()
	popen.wait()

	if not overlay_is_template:
	os.unlink(overlay_file.name)

	if not background_is_template:
	os.unlink(background_file.name)

	return pdf_contents


	def join_pdfs(*pdfs):
	"""
	Joins multiple pdf files
	Prerequisites: pdftk (http://www.pdflabs.com/docs/install-pdftk/)
	"""
	file_names = []
	temporary_files = []

	for pdf in pdfs:
	if hasattr(pdf, 'name'):
	pdf_path = settings.MEDIA_ROOT + '/' + pdf.name
	file_names.append(pdf_path)
	elif pdf.endswith('.pdf'):
	file_names.append(get_file_location_from_template_name(pdf))
	else:
	pdf_file = NamedTemporaryFile(delete=False, suffix='.pdf')
	pdf_file.write(pdf)
	pdf_file.close()
	temporary_files.append(pdf_file)
	file_names.append(pdf_file.name)

	command = "pdftk %s cat output -" % ' '.join(file_names)

	popen = subprocess.Popen(command, bufsize=4096,
	stdout=subprocess.PIPE, stderr=subprocess.PIPE,
	shell=True)

	pdf_contents = popen.stdout.read()
	popen.wait()

	for temporary_file in temporary_files:
	os.unlink(temporary_file.name)

	return pdf_contents