mara004 · April 2, 2025 00:41 · mara004 · Aug 26, 2024 · mara004 · Feb 9, 2025
diff --git a/ghostscript_shell.py b/ghostscript_shell.py
 # SPDX-FileCopyrightText: 2024 geisserml <[email protected]>
 # SPDX-FileCopyrightText: 2024 James R. Barlow <[email protected]>
 # SPDX-License-Identifier: MPL-2.0

 # Initial code derived from ocrmypdf/_exec/ghostscript.py
 # Note that Ghostscript is AGPL-licensed. However, we are calling it via subprocess here, so not sure whether copyleft would actually apply.
 # See also https://www.gnu.org/licenses/gpl-faq.en.html#MereAggregation

 import io
 import os
 import re
 import sys
 import shutil
 import logging
 import subprocess
 import PIL.Image
 logger = logging.getLogger(__name__)


 def get_ghostscript():
    
    # TODO consider searching the windows registry, as python-ghostscript and ocrmypdf do
    # https://gitlab.com/pdftools/python-ghostscript/-/blob/9f84bf0e02f04eaad4bd998b9c5bef2be55e6389/ghostscript/_gsprint.py#L501
    # https://github.com/jbarlow83/OCRmyPDF/blob/master/src/ocrmypdf/subprocess/_windows.py
    
    if sys.platform.startswith('win32'):
        gs = shutil.which('gswin64c')
        if not gs:
            gs = shutil.which('gswin32c')
    else:
        gs = shutil.which('gs')
    
    return gs


 def _gs_error_reported(stream):
    return bool( re.search('error', stream, flags=re.IGNORECASE) )

 def _gs_rasterise_pdf(
        input_file,
        *,
        pageno,
        raster_dpi,
        password = None,
        raster_device = 'png16m',
    ):
    """
    Rasterize one page of a PDF at resolution *raster_dpi*.
    *pageno* is the visual (1-based) page number.
    Note that Ghostscript takes /UserUnit into account on its own.
    """
    
    raster_dpi = round(raster_dpi, 6)
    gs = get_ghostscript()
    if not gs:
        raise RuntimeError("Ghostscript could not be found. Make sure it is installed and added to $PATH.")
    
    args_gs = []
    args_gs.extend(
        [
            gs,
            '-dQUIET',
            '-dSAFER',
            '-dBATCH',
            '-dNOPAUSE',
            '-dNOPROMPT',
            f'-sDEVICE={raster_device}',
            f'-dFirstPage={pageno}',
            f'-dLastPage={pageno}',
            f'-r{raster_dpi:f}x{raster_dpi:f}',
            '-dTextAlphaBits=4',
            '-dGraphicsAlphaBits=4',
            '-dInterpolateControl=-1',
        ]
    )
    
    if password is not None:
        args_gs.append(f'-sPDFPassword={password}')
    
    args_gs.extend(
        [
            '-o',
            '-',
            '-sstdout=%stderr',
            '-dAutoRotatePages=/None',
            '-f',
            os.fspath(input_file),
        ]
    )
    
    #logger.debug(args_gs)
    
    try:
        pipe = subprocess.run(
            args_gs,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE,
            check = True,
        )
    
    except subprocess.CalledProcessError as error_msg:
        logger.error(error_msg.stderr.decode(errors='replace'))
        raise RuntimeError('Ghostscript rasterizing failed')
    
    else:
        stderr = pipe.stderr.decode(errors='replace')
        if _gs_error_reported(stderr):
            logger.error(stderr)
    
    return PIL.Image.open( io.BytesIO(pipe.stdout) )


 def invoke_ghostscript_shell(filepath, index, scale=4, password=None):
    # Note, this does not handle rotation yet -> TODO
    return _gs_rasterise_pdf(
        filepath,
        password   = password,
        pageno     = index + 1,
        raster_dpi = scale * 72,
    )
	# SPDX-FileCopyrightText: 2024 geisserml <[email protected]>
	# SPDX-FileCopyrightText: 2024 James R. Barlow <[email protected]>
	# SPDX-License-Identifier: MPL-2.0

	# Initial code derived from ocrmypdf/_exec/ghostscript.py
	# Note that Ghostscript is AGPL-licensed. However, we are calling it via subprocess here, so not sure whether copyleft would actually apply.
	# See also https://www.gnu.org/licenses/gpl-faq.en.html#MereAggregation

	import io
	import os
	import re
	import sys
	import shutil
	import logging
	import subprocess
	import PIL.Image
	logger = logging.getLogger(__name__)


	def get_ghostscript():

	# TODO consider searching the windows registry, as python-ghostscript and ocrmypdf do
	# https://gitlab.com/pdftools/python-ghostscript/-/blob/9f84bf0e02f04eaad4bd998b9c5bef2be55e6389/ghostscript/_gsprint.py#L501
	# https://github.com/jbarlow83/OCRmyPDF/blob/master/src/ocrmypdf/subprocess/_windows.py

	if sys.platform.startswith('win32'):
	gs = shutil.which('gswin64c')
	if not gs:
	gs = shutil.which('gswin32c')
	else:
	gs = shutil.which('gs')

	return gs


	def _gs_error_reported(stream):
	return bool( re.search('error', stream, flags=re.IGNORECASE) )

	def _gs_rasterise_pdf(
	input_file,
	*,
	pageno,
	raster_dpi,
	password = None,
	raster_device = 'png16m',
	):
	"""
	Rasterize one page of a PDF at resolution raster_dpi.
	pageno is the visual (1-based) page number.
	Note that Ghostscript takes /UserUnit into account on its own.
	"""

	raster_dpi = round(raster_dpi, 6)
	gs = get_ghostscript()
	if not gs:
	raise RuntimeError("Ghostscript could not be found. Make sure it is installed and added to $PATH.")

	args_gs = []
	args_gs.extend(
	[
	gs,
	'-dQUIET',
	'-dSAFER',
	'-dBATCH',
	'-dNOPAUSE',
	'-dNOPROMPT',
	f'-sDEVICE={raster_device}',
	f'-dFirstPage={pageno}',
	f'-dLastPage={pageno}',
	f'-r{raster_dpi:f}x{raster_dpi:f}',
	'-dTextAlphaBits=4',
	'-dGraphicsAlphaBits=4',
	'-dInterpolateControl=-1',
	]
	)

	if password is not None:
	args_gs.append(f'-sPDFPassword={password}')

	args_gs.extend(
	[
	'-o',
	'-',
	'-sstdout=%stderr',
	'-dAutoRotatePages=/None',
	'-f',
	os.fspath(input_file),
	]
	)

	#logger.debug(args_gs)

	try:
	pipe = subprocess.run(
	args_gs,
	stdout = subprocess.PIPE,
	stderr = subprocess.PIPE,
	check = True,
	)

	except subprocess.CalledProcessError as error_msg:
	logger.error(error_msg.stderr.decode(errors='replace'))
	raise RuntimeError('Ghostscript rasterizing failed')

	else:
	stderr = pipe.stderr.decode(errors='replace')
	if _gs_error_reported(stderr):
	logger.error(stderr)

	return PIL.Image.open( io.BytesIO(pipe.stdout) )


	def invoke_ghostscript_shell(filepath, index, scale=4, password=None):
	# Note, this does not handle rotation yet -> TODO
	return _gs_rasterise_pdf(
	filepath,
	password = password,
	pageno = index + 1,
	raster_dpi = scale * 72,
	)