-
-
Save WaYdotNET/b73aeb2dd50a9e226407aa2898d11d13 to your computer and use it in GitHub Desktop.
Arbitrary header/footer integration in Weasyprint PDF pages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from weasyprint import HTML, CSS | |
class PdfGenerator: | |
""" | |
Generate a PDF out of a rendered template, with the possibility to integrate nicely | |
a header and a footer if provided. | |
Notes: | |
------ | |
- When Weasyprint renders an html into a PDF, it goes though several intermediate steps. | |
Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page` | |
or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive | |
method `get_element` for example. | |
For more, see: | |
https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source | |
https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure | |
- Warning: the logic of this class relies heavily on the internal Weasyprint API. | |
- This generator draws its inspiration and, also a bit of its implementation, from this | |
discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92 | |
""" | |
OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}' | |
def __init__(self, main_html, header_html=None, footer_html=None, | |
base_url=None, side_margin=2, extra_vertical_margin=30): | |
""" | |
Parameters | |
---------- | |
main_html: str | |
An HTML file (most of the time a template rendered into a string) which represents | |
the core of the PDF to generate. | |
header_html: str | |
An optional header html. | |
footer_html: str | |
An optional footer html. | |
base_url: str | |
An absolute url to the page which serves as a reference to Weasyprint to fetch assets, | |
required to get our media. | |
side_margin: int, interpreted in cm, by default 2cm | |
The margin to apply on the core of the rendered PDF (i.e. main_html). | |
extra_vertical_margin: int, interpreted in pixel, by default 30 pixels | |
An extra margin to apply between the main content and header and the footer. | |
The goal is to avoid having the content of `main_html` touching the header or the | |
footer. | |
""" | |
self.main_html = main_html | |
self.header_html = header_html | |
self.footer_html = footer_html | |
self.base_url = base_url | |
self.side_margin = side_margin | |
self.extra_vertical_margin = extra_vertical_margin | |
def _compute_overlay_element(self, element: str): | |
""" | |
Parameters | |
---------- | |
element: str | |
Either 'header' or 'footer' | |
Returns | |
------- | |
element_body: BlockBox | |
A Weasyprint pre-rendered representation of an html element | |
element_height: float | |
The height of this element, which will be then translated in a html height | |
""" | |
html = HTML( | |
string=getattr(self, f'{element}_html'), | |
base_url=self.base_url, | |
) | |
element_doc = html.render(stylesheets=[CSS(string=self.OVERLAY_LAYOUT)]) | |
element_page = element_doc.pages[0] | |
element_body = PdfGenerator.get_element(element_page._page_box.all_children(), 'body') | |
element_body = element_body.copy_with_children(element_body.all_children()) | |
element_html = PdfGenerator.get_element(element_page._page_box.all_children(), element) | |
if element == 'header': | |
element_height = element_html.height | |
if element == 'footer': | |
element_height = element_page.height - element_html.position_y | |
return element_body, element_height | |
def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None): | |
""" | |
Insert the header and the footer in the main document. | |
Parameters | |
---------- | |
main_doc: Document | |
The top level representation for a PDF page in Weasyprint. | |
header_body: BlockBox | |
A representation for an html element in Weasyprint. | |
footer_body: BlockBox | |
A representation for an html element in Weasyprint. | |
""" | |
for page in main_doc.pages: | |
page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body') | |
if header_body: | |
page_body.children += header_body.all_children() | |
if footer_body: | |
page_body.children += footer_body.all_children() | |
def render_pdf(self): | |
""" | |
Returns | |
------- | |
pdf: a bytes sequence | |
The rendered PDF. | |
""" | |
if self.header_html: | |
header_body, header_height = self._compute_overlay_element('header') | |
else: | |
header_body, header_height = None, 0 | |
if self.footer_html: | |
footer_body, footer_height = self._compute_overlay_element('footer') | |
else: | |
footer_body, footer_height = None, 0 | |
margins = '{header_size}px {side_margin} {footer_size}px {side_margin}'.format( | |
header_size=header_height + self.extra_vertical_margin, | |
footer_size=footer_height + self.extra_vertical_margin, | |
side_margin=f'{self.side_margin}cm', | |
) | |
content_print_layout = '@page {size: A4 portrait; margin: %s;}' % margins | |
html = HTML( | |
string=self.main_html, | |
base_url=self.base_url, | |
) | |
main_doc = html.render(stylesheets=[CSS(string=content_print_layout)]) | |
if self.header_html or self.footer_html: | |
self._apply_overlay_on_main(main_doc, header_body, footer_body) | |
pdf = main_doc.write_pdf() | |
return pdf | |
@staticmethod | |
def get_element(boxes, element): | |
""" | |
Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the | |
box which is named `element`. | |
Look at the notes of the class for more details on Weasyprint insides. | |
""" | |
for box in boxes: | |
if box.element_tag == element: | |
return box | |
return PdfGenerator.get_element(box.all_children(), element) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment