-
-
Save nathandem/63f410d501d200cfa5613b38aaf74c44 to your computer and use it in GitHub Desktop.
from weasyprint import HTML, CSS | |
class PdfGenerator: | |
""" | |
Generate a PDF out of a rendered template, with the possibility to integrate nicely | |
a header and a footer if provided. | |
Notes: | |
------ | |
- When Weasyprint renders an html into a PDF, it goes though several intermediate steps. | |
Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page` | |
or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive | |
method `get_element` for example. | |
For more, see: | |
https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source | |
https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure | |
- Warning: the logic of this class relies heavily on the internal Weasyprint API. | |
- This generator draws its inspiration and, also a bit of its implementation, from this | |
discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92 | |
""" | |
OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}' | |
def __init__(self, main_html, header_html=None, footer_html=None, | |
base_url=None, side_margin=2, extra_vertical_margin=30): | |
""" | |
Parameters | |
---------- | |
main_html: str | |
An HTML file (most of the time a template rendered into a string) which represents | |
the core of the PDF to generate. | |
header_html: str | |
An optional header html. | |
footer_html: str | |
An optional footer html. | |
base_url: str | |
An absolute url to the page which serves as a reference to Weasyprint to fetch assets, | |
required to get our media. | |
side_margin: int, interpreted in cm, by default 2cm | |
The margin to apply on the core of the rendered PDF (i.e. main_html). | |
extra_vertical_margin: int, interpreted in pixel, by default 30 pixels | |
An extra margin to apply between the main content and header and the footer. | |
The goal is to avoid having the content of `main_html` touching the header or the | |
footer. | |
""" | |
self.main_html = main_html | |
self.header_html = header_html | |
self.footer_html = footer_html | |
self.base_url = base_url | |
self.side_margin = side_margin | |
self.extra_vertical_margin = extra_vertical_margin | |
def _compute_overlay_element(self, element: str): | |
""" | |
Parameters | |
---------- | |
element: str | |
Either 'header' or 'footer' | |
Returns | |
------- | |
element_body: BlockBox | |
A Weasyprint pre-rendered representation of an html element | |
element_height: float | |
The height of this element, which will be then translated in a html height | |
""" | |
html = HTML( | |
string=getattr(self, f'{element}_html'), | |
base_url=self.base_url, | |
) | |
element_doc = html.render(stylesheets=[CSS(string=self.OVERLAY_LAYOUT)]) | |
element_page = element_doc.pages[0] | |
element_body = PdfGenerator.get_element(element_page._page_box.all_children(), 'body') | |
element_body = element_body.copy_with_children(element_body.all_children()) | |
element_html = PdfGenerator.get_element(element_page._page_box.all_children(), element) | |
if element == 'header': | |
element_height = element_html.height | |
if element == 'footer': | |
element_height = element_page.height - element_html.position_y | |
return element_body, element_height | |
def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None): | |
""" | |
Insert the header and the footer in the main document. | |
Parameters | |
---------- | |
main_doc: Document | |
The top level representation for a PDF page in Weasyprint. | |
header_body: BlockBox | |
A representation for an html element in Weasyprint. | |
footer_body: BlockBox | |
A representation for an html element in Weasyprint. | |
""" | |
for page in main_doc.pages: | |
page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body') | |
if header_body: | |
page_body.children += header_body.all_children() | |
if footer_body: | |
page_body.children += footer_body.all_children() | |
def render_pdf(self): | |
""" | |
Returns | |
------- | |
pdf: a bytes sequence | |
The rendered PDF. | |
""" | |
if self.header_html: | |
header_body, header_height = self._compute_overlay_element('header') | |
else: | |
header_body, header_height = None, 0 | |
if self.footer_html: | |
footer_body, footer_height = self._compute_overlay_element('footer') | |
else: | |
footer_body, footer_height = None, 0 | |
margins = '{header_size}px {side_margin} {footer_size}px {side_margin}'.format( | |
header_size=header_height + self.extra_vertical_margin, | |
footer_size=footer_height + self.extra_vertical_margin, | |
side_margin=f'{self.side_margin}cm', | |
) | |
content_print_layout = '@page {size: A4 portrait; margin: %s;}' % margins | |
html = HTML( | |
string=self.main_html, | |
base_url=self.base_url, | |
) | |
main_doc = html.render(stylesheets=[CSS(string=content_print_layout)]) | |
if self.header_html or self.footer_html: | |
self._apply_overlay_on_main(main_doc, header_body, footer_body) | |
pdf = main_doc.write_pdf() | |
return pdf | |
@staticmethod | |
def get_element(boxes, element): | |
""" | |
Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the | |
box which is named `element`. | |
Look at the notes of the class for more details on Weasyprint insides. | |
""" | |
for box in boxes: | |
if box.element_tag == element: | |
return box | |
return PdfGenerator.get_element(box.all_children(), element) |
@nathandem Thanks for this solution. It rocks. Saved a lot of time.
My use-case is slightly different. I have no header for Page 1 and need to add a common header for the subsequent pages. Is there a way to change the margin for the first Page alone?
@nathandem Thanks for this solution. It rocks. Saved a lot of time.
My use-case is slightly different. I have no header for Page 1 and need to add a common header for the subsequent pages. Is there a way to change the margin for the first Page alone?
Hi @mpsiva89, If you have used this class, can you please share an example header html that works with this class? I am having trouble making it work.
Thanks
It's a neat solution but I'd be wary of using non-documented internal attributes like _page_box
here as there is always a chance these might break in the future without anyone mentioning it in change logs.
I'd also like to point out that at least as of WeasyPrint 52.5 running()
and content()
values are supported, see https://www.w3.org/TR/css-gcpm-3/#running-syntax for example.
This way you can include your as-complex-as-needed headers and footers right in the main document, select them with CSS and place them in one of the page margins. @schmidtfx With this solution counter(page)
will also work as the headers are actually part of the full document.
To skip the first page, you'd just include the header element only in the second page. You can also change headers further down the document simply by inlining another header with the same CSS selector. This is something that's rather hard to do with this solution when dynamic length data is concerned.
I have noticed that WeasyPrint will by default size these according to content and even setting width: 100%
or width: 100vw
will not stretch them to full width - maybe there is no proper parent element? In any case I worked around that by setting an absolute width equal to the page width, there may also be better ways.
Sadly, no :-(