Last active
December 4, 2020 04:32
-
-
Save another-junior-dev/990a4e622868627cb93be3d8fa2eff04 to your computer and use it in GitHub Desktop.
A script to extract images from .zip, .docx, .xlsx, and .pptx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import os | |
import shutil | |
import logging | |
import argparse | |
import tempfile | |
from pathlib import Path | |
from zipfile import ZipFile | |
IMAGE_EXT = ('png', 'jpeg', 'jpg') | |
def extract_images(filepath, destination): | |
'''Function to extract images from a given file''' | |
def is_image(filename): | |
return any(filename.endswith(ext) for ext in IMAGE_EXT) | |
file_count = 0 | |
overall_size = 0 | |
data = [] | |
try: | |
# Creates a temporary directory | |
with tempfile.TemporaryDirectory() as working_dir: | |
logging.info('Created temporal working directory {}'.format(working_dir)) | |
# Unzips the images | |
with ZipFile(filepath) as working_zip: | |
image_list = [name for name in working_zip.namelist() if is_image(name)] | |
for x in image_list: | |
overall_size = overall_size + working_zip.getinfo(x).file_size | |
file_count = len(image_list) | |
working_zip.extractall(working_dir, image_list) | |
data.append(file_count) | |
data.append(overall_size) | |
logging.info('Extracted {} images'.format(file_count)) | |
# Copies the extracted images to destination directory | |
for x in image_list: | |
shutil.copy(Path(working_dir).resolve() / x, destination) | |
logging.info('Copied {}'.format(x)) | |
logging.info('Copied all image files to {}'.format(destination.resolve())) | |
return data | |
except Exception as e: | |
logging.info('File is a {}'.format(filepath.suffix)) | |
logging.error('There was an error unzipping the file, make sure it\'s a zipped file (.zip, .docx, .xlsx, .pptx)') | |
logging.exception(e) | |
data.append(filepath.suffix) | |
return data | |
def main(): | |
parser = argparse.ArgumentParser(prog = 'image_mod.py', description = 'Extracts images from zip, docx, xlsx, and pptx files') | |
parser.add_argument('filepath') | |
parser.add_argument('-d', '--destination', default = '.') | |
args = parser.parse_args() | |
if args.destination: | |
if Path(args.filepath).is_file() and Path(args.destination).is_dir(): | |
process = extract_images(Path(args.filepath), Path(args.destination)) | |
if len(process) == 2: | |
print('Operation completed successfully. {} images were extracted ({:,.2f}KB total)'.format(process[0], process[1] / 1024)) | |
logging.info('Operation successful') | |
else: | |
print('Operation failed. File type {} not supported.'.format(process[0])) | |
logging.info('Operation failed') | |
else: | |
print('Operation failed. Either the file you provided or the destination directory doesn\'t exist.') | |
logging.info('Filename or destination directory doesn\'t exist') | |
logging.error('File: {}; Directory: {}'.format(Path(args.filepath).exists(), Path(args.destination).exists())) | |
if __name__ == "__main__": | |
logpath = Path(os.environ['LOCALAPPDATA']) / 'Programs' / 'image_mod' | |
logfile = 'image_mod.log' | |
if logpath.exists() is False: | |
Path.mkdir(logpath) | |
else: | |
logging.basicConfig(filename = logpath / logfile, level = logging.DEBUG, format = '%(asctime)s - %(name)s - %(levelname)s: %(message)s') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment