Skip to content

Instantly share code, notes, and snippets.

@justengel
Last active June 4, 2020 19:30
Show Gist options
  • Save justengel/7a1bce955157cf6a11022d8d2ba51df3 to your computer and use it in GitHub Desktop.
Save justengel/7a1bce955157cf6a11022d8d2ba51df3 to your computer and use it in GitHub Desktop.
Convert pdf file to jpeg.
"""
Convert PDF images to jpg files
Requirements:
* Install imagemagick http://www.imagemagick.org/download/binaries/
* Install ghostscript https://www.ghostscript.com/download.html
* pip install wand
"""
import os
from wand.image import Image
__all__ = ['make_new_filename', 'input_files', 'convert']
def make_new_filename(filename, ext):
"""Make a new filename.
Args:
filename (str): Original filename
ext (str): New filename extension.
"""
base_name = os.path.splitext(filename)[0]
new_filename = '{}{}'.format(base_name, ext)
i = 0
while os.path.exists(new_filename):
new_filename = '{} [{}]{}'.format(base_name, i, ext) # 'filename [1].jpeg'
i += 1
return new_filename
def input_files(*files, ext='.jpeg', modify_ext=True):
"""Ask the user's input for files and a new extension.
Args:
files (list)[None]: List of string filenames.
ext (str)['.jpeg']: New extension for the files.
modify_ext (bool)[True]: If True the user can change the extension through the input.
"""
if modify_ext:
print('Enter a filenames (blank line to stop, "ext = .png" for new extension):')
else:
print('Enter a filenames (blank line to stop:')
files = list(files)
while True:
value = input('')
if value.strip() == '':
break
elif modify_ext and value.lower().replace(' ', '').startswith('ext='):
# Only get the extension with no quotes
ext = value.split('=', 1)[-1].strip(' "\'')
else:
# Remove quotes from filename
files.append(value.strip('"\''))
return files, ext
def convert(*files, ext='.jpeg', new_filename_creator=None):
"""Convert the images in the given pdf files to a new image file with the given extension.
Args:
files (tuple/str): List of filenames to convert.
ext (str)[.jpeg]: Extension to convert the images to.
new_filename_creator (function/Callable)[make_new_filename]: Function that takes in an
old filename and extension and returns a new filename to save to.
Returns:
new_images (list/str): List of new image filenames that were saved.
"""
if new_filename_creator is None:
new_filename_creator = make_new_filename
new_images = []
add = new_images.append
for file in files:
with(Image(filename=file, resolution=120)) as source:
for image in source.sequence:
new_filename = make_new_filename(file, ext)
Image(image).save(filename=new_filename)
add(new_filename)
return new_images
if __name__ == '__main__':
import argparse
P = argparse.ArgumentParser('Extract PDF images')
P.add_argument('files', nargs='*', type=str, help='Files to convert. Leave empty to enter manually.')
P.add_argument('-e', '--ext', type=str, default='.jpeg', help='Extension to convert to (".jpeg")')
ARGS = P.parse_args()
FILES = list(ARGS.files)
EXT = ARGS.ext
if len(FILES) == 0:
FILES, EXT = input_files(ext=EXT, modify_ext=True)
# Run the command
convert(*FILES, ext=EXT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment