Skip to content

Instantly share code, notes, and snippets.

@fbuchinger
Last active November 27, 2019 09:19
Show Gist options
  • Save fbuchinger/7b8d68e967e8a0600b79 to your computer and use it in GitHub Desktop.
Save fbuchinger/7b8d68e967e8a0600b79 to your computer and use it in GitHub Desktop.
Benchmark comparing exiftool, pyexiftool and multiprocessed pyexiftool processing speed.
import os
import timeit
import time
import multiprocessing
import exiftool
metadatadir = r"E:\dev\exiftool.js\test\sampleImages\Acer"
def exiftool_no_batch():
md = []
files = get_filelist()
"""internal batchmode of exiftool - ET itself finds out which files to read"""
for file in files:
md_file = os.popen(r"exiftool.exe -j %s" % file).read()
md.append(md_file)
return md
def exiftool_internal_batch():
"""internal batchmode of exiftool - ET itself finds out which files to read"""
md = os.popen(r"exiftool.exe -j %s\\*.jpg" % metadatadir).read()
return md
def get_filelist():
files = []
for sample in os.listdir(metadatadir):
files.append(os.path.join(metadatadir, sample))
return files
def exiftool_stay_open(files, mp = None):
"""external batchmode of exiftool - ET is invoked by pyexiftool using a stay_open with a filelist
filelist - list of files to read
mp - make function multiprocessing aware yes/no (results will be stuffed into a queue)
"""
with exiftool.ExifTool() as et:
metadata = et.get_metadata_batch(files)
if mp is not None:
result_queue.put(metadata)
return metadata
if __name__ == '__main__':
no_batch_start = time.clock()
exiftool_no_batch()
no_batch_end = time.clock()
print ("Exiftool no batch took %s" % (no_batch_end - no_batch_start))
internal_batch_start = time.clock()
exiftool_internal_batch()
internal_batch_end = time.clock()
print ("Exiftool internal batch took %s" % (internal_batch_end - internal_batch_start))
external_batch_start = time.clock()
files = get_filelist()
exiftool_stay_open(files)
external_batch_end = time.clock()
print ("Exiftool Stay Open/External batch took %s" % (external_batch_end - external_batch_start))
"""invoke three exiftool stay_open instances at once to split the load between them"""
mbs_start = time.clock()
num_processes = 2
pool = multiprocessing.Pool(processes=num_processes) # start worker processes
result = pool.map(exiftool_stay_open, (files,), len(files)/num_processes)
mbs_end = time.clock()
print ("Exiftool multiprocessing batch took %s" % (mbs_end - mbs_start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment