Last active
November 27, 2019 09:19
-
-
Save fbuchinger/7b8d68e967e8a0600b79 to your computer and use it in GitHub Desktop.
Benchmark comparing exiftool, pyexiftool and multiprocessed pyexiftool processing speed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import timeit | |
import time | |
import multiprocessing | |
import exiftool | |
metadatadir = r"E:\dev\exiftool.js\test\sampleImages\Acer" | |
def exiftool_no_batch(): | |
md = [] | |
files = get_filelist() | |
"""internal batchmode of exiftool - ET itself finds out which files to read""" | |
for file in files: | |
md_file = os.popen(r"exiftool.exe -j %s" % file).read() | |
md.append(md_file) | |
return md | |
def exiftool_internal_batch(): | |
"""internal batchmode of exiftool - ET itself finds out which files to read""" | |
md = os.popen(r"exiftool.exe -j %s\\*.jpg" % metadatadir).read() | |
return md | |
def get_filelist(): | |
files = [] | |
for sample in os.listdir(metadatadir): | |
files.append(os.path.join(metadatadir, sample)) | |
return files | |
def exiftool_stay_open(files, mp = None): | |
"""external batchmode of exiftool - ET is invoked by pyexiftool using a stay_open with a filelist | |
filelist - list of files to read | |
mp - make function multiprocessing aware yes/no (results will be stuffed into a queue) | |
""" | |
with exiftool.ExifTool() as et: | |
metadata = et.get_metadata_batch(files) | |
if mp is not None: | |
result_queue.put(metadata) | |
return metadata | |
if __name__ == '__main__': | |
no_batch_start = time.clock() | |
exiftool_no_batch() | |
no_batch_end = time.clock() | |
print ("Exiftool no batch took %s" % (no_batch_end - no_batch_start)) | |
internal_batch_start = time.clock() | |
exiftool_internal_batch() | |
internal_batch_end = time.clock() | |
print ("Exiftool internal batch took %s" % (internal_batch_end - internal_batch_start)) | |
external_batch_start = time.clock() | |
files = get_filelist() | |
exiftool_stay_open(files) | |
external_batch_end = time.clock() | |
print ("Exiftool Stay Open/External batch took %s" % (external_batch_end - external_batch_start)) | |
"""invoke three exiftool stay_open instances at once to split the load between them""" | |
mbs_start = time.clock() | |
num_processes = 2 | |
pool = multiprocessing.Pool(processes=num_processes) # start worker processes | |
result = pool.map(exiftool_stay_open, (files,), len(files)/num_processes) | |
mbs_end = time.clock() | |
print ("Exiftool multiprocessing batch took %s" % (mbs_end - mbs_start)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment