Last active
June 4, 2020 19:30
-
-
Save justengel/7a1bce955157cf6a11022d8d2ba51df3 to your computer and use it in GitHub Desktop.
Convert pdf file to jpeg.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Convert PDF images to jpg files | |
Requirements: | |
* Install imagemagick http://www.imagemagick.org/download/binaries/ | |
* Install ghostscript https://www.ghostscript.com/download.html | |
* pip install wand | |
""" | |
import os | |
from wand.image import Image | |
__all__ = ['make_new_filename', 'input_files', 'convert'] | |
def make_new_filename(filename, ext): | |
"""Make a new filename. | |
Args: | |
filename (str): Original filename | |
ext (str): New filename extension. | |
""" | |
base_name = os.path.splitext(filename)[0] | |
new_filename = '{}{}'.format(base_name, ext) | |
i = 0 | |
while os.path.exists(new_filename): | |
new_filename = '{} [{}]{}'.format(base_name, i, ext) # 'filename [1].jpeg' | |
i += 1 | |
return new_filename | |
def input_files(*files, ext='.jpeg', modify_ext=True): | |
"""Ask the user's input for files and a new extension. | |
Args: | |
files (list)[None]: List of string filenames. | |
ext (str)['.jpeg']: New extension for the files. | |
modify_ext (bool)[True]: If True the user can change the extension through the input. | |
""" | |
if modify_ext: | |
print('Enter a filenames (blank line to stop, "ext = .png" for new extension):') | |
else: | |
print('Enter a filenames (blank line to stop:') | |
files = list(files) | |
while True: | |
value = input('') | |
if value.strip() == '': | |
break | |
elif modify_ext and value.lower().replace(' ', '').startswith('ext='): | |
# Only get the extension with no quotes | |
ext = value.split('=', 1)[-1].strip(' "\'') | |
else: | |
# Remove quotes from filename | |
files.append(value.strip('"\'')) | |
return files, ext | |
def convert(*files, ext='.jpeg', new_filename_creator=None): | |
"""Convert the images in the given pdf files to a new image file with the given extension. | |
Args: | |
files (tuple/str): List of filenames to convert. | |
ext (str)[.jpeg]: Extension to convert the images to. | |
new_filename_creator (function/Callable)[make_new_filename]: Function that takes in an | |
old filename and extension and returns a new filename to save to. | |
Returns: | |
new_images (list/str): List of new image filenames that were saved. | |
""" | |
if new_filename_creator is None: | |
new_filename_creator = make_new_filename | |
new_images = [] | |
add = new_images.append | |
for file in files: | |
with(Image(filename=file, resolution=120)) as source: | |
for image in source.sequence: | |
new_filename = make_new_filename(file, ext) | |
Image(image).save(filename=new_filename) | |
add(new_filename) | |
return new_images | |
if __name__ == '__main__': | |
import argparse | |
P = argparse.ArgumentParser('Extract PDF images') | |
P.add_argument('files', nargs='*', type=str, help='Files to convert. Leave empty to enter manually.') | |
P.add_argument('-e', '--ext', type=str, default='.jpeg', help='Extension to convert to (".jpeg")') | |
ARGS = P.parse_args() | |
FILES = list(ARGS.files) | |
EXT = ARGS.ext | |
if len(FILES) == 0: | |
FILES, EXT = input_files(ext=EXT, modify_ext=True) | |
# Run the command | |
convert(*FILES, ext=EXT) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment