Last active
April 2, 2025 00:41
-
-
Save mara004/8ef3a803531fdd42b29bbfa2889ff7f3 to your computer and use it in GitHub Desktop.
PDF rendering with Ghostscript (via subprocess)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <[email protected]> | |
# SPDX-FileCopyrightText: 2024 James R. Barlow <[email protected]> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Initial code derived from ocrmypdf/_exec/ghostscript.py | |
# Note that Ghostscript is AGPL-licensed. However, we are calling it via subprocess here, so not sure whether copyleft would actually apply. | |
# See also https://www.gnu.org/licenses/gpl-faq.en.html#MereAggregation | |
import io | |
import os | |
import re | |
import sys | |
import shutil | |
import logging | |
import subprocess | |
import PIL.Image | |
logger = logging.getLogger(__name__) | |
def get_ghostscript(): | |
# TODO consider searching the windows registry, as python-ghostscript and ocrmypdf do | |
# https://gitlab.com/pdftools/python-ghostscript/-/blob/9f84bf0e02f04eaad4bd998b9c5bef2be55e6389/ghostscript/_gsprint.py#L501 | |
# https://github.com/jbarlow83/OCRmyPDF/blob/master/src/ocrmypdf/subprocess/_windows.py | |
if sys.platform.startswith('win32'): | |
gs = shutil.which('gswin64c') | |
if not gs: | |
gs = shutil.which('gswin32c') | |
else: | |
gs = shutil.which('gs') | |
return gs | |
def _gs_error_reported(stream): | |
return bool( re.search('error', stream, flags=re.IGNORECASE) ) | |
def _gs_rasterise_pdf( | |
input_file, | |
*, | |
pageno, | |
raster_dpi, | |
password = None, | |
raster_device = 'png16m', | |
): | |
""" | |
Rasterize one page of a PDF at resolution *raster_dpi*. | |
*pageno* is the visual (1-based) page number. | |
Note that Ghostscript takes /UserUnit into account on its own. | |
""" | |
raster_dpi = round(raster_dpi, 6) | |
gs = get_ghostscript() | |
if not gs: | |
raise RuntimeError("Ghostscript could not be found. Make sure it is installed and added to $PATH.") | |
args_gs = [] | |
args_gs.extend( | |
[ | |
gs, | |
'-dQUIET', | |
'-dSAFER', | |
'-dBATCH', | |
'-dNOPAUSE', | |
'-dNOPROMPT', | |
f'-sDEVICE={raster_device}', | |
f'-dFirstPage={pageno}', | |
f'-dLastPage={pageno}', | |
f'-r{raster_dpi:f}x{raster_dpi:f}', | |
'-dTextAlphaBits=4', | |
'-dGraphicsAlphaBits=4', | |
'-dInterpolateControl=-1', | |
] | |
) | |
if password is not None: | |
args_gs.append(f'-sPDFPassword={password}') | |
args_gs.extend( | |
[ | |
'-o', | |
'-', | |
'-sstdout=%stderr', | |
'-dAutoRotatePages=/None', | |
'-f', | |
os.fspath(input_file), | |
] | |
) | |
#logger.debug(args_gs) | |
try: | |
pipe = subprocess.run( | |
args_gs, | |
stdout = subprocess.PIPE, | |
stderr = subprocess.PIPE, | |
check = True, | |
) | |
except subprocess.CalledProcessError as error_msg: | |
logger.error(error_msg.stderr.decode(errors='replace')) | |
raise RuntimeError('Ghostscript rasterizing failed') | |
else: | |
stderr = pipe.stderr.decode(errors='replace') | |
if _gs_error_reported(stderr): | |
logger.error(stderr) | |
return PIL.Image.open( io.BytesIO(pipe.stdout) ) | |
def invoke_ghostscript_shell(filepath, index, scale=4, password=None): | |
# Note, this does not handle rotation yet -> TODO | |
return _gs_rasterise_pdf( | |
filepath, | |
password = password, | |
pageno = index + 1, | |
raster_dpi = scale * 72, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I stared working on this: https://gist.github.com/mara004/428a9aad5d553d4631ab0b5119eb74b2