Last active
December 21, 2015 10:49
-
-
Save stuarteberg/6294491 to your computer and use it in GitHub Desktop.
Benchmarking alternate implementations for relabeling a label image with a mapping specified as a dict.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
original_labels = None | |
mapping = None | |
# TODO: Ensure that all labels are present in mapping dict | |
# because none of these functions work otherwise. | |
def using_index_array(): | |
consecutivized_labels = numpy.searchsorted( sorted( mapping.iterkeys() ), original_labels ) | |
index_array = numpy.array( sorted( mapping.iteritems() ) )[:, 1] | |
return index_array[ consecutivized_labels ] | |
def using_frompyfunc(): | |
vectorized_relabel = numpy.frompyfunc(mapping.__getitem__, 1, 1) | |
return vectorized_relabel( original_labels ) | |
def using_frompyfunc_with_lambda(): | |
vectorized_relabel = numpy.frompyfunc(lambda x: mapping[x], 1, 1) | |
return vectorized_relabel( original_labels ) | |
def using_vectorize(): | |
vectorized_relabel = numpy.vectorize(mapping.__getitem__) | |
return vectorized_relabel( original_labels ) | |
def using_vectorize_with_lambda(): | |
vectorized_relabel = numpy.vectorize(lambda x: mapping[x]) | |
return vectorized_relabel( original_labels ) | |
def using_plain_forloop(): | |
# This turns out to be ridiculously slow. | |
result = numpy.ndarray( shape=original_labels.shape, dtype=original_labels.dtype ) | |
result_flat = result.flat | |
original_flat = original_labels.flat | |
for i in xrange( len(result_flat) ): | |
result_flat[i] = mapping[original_flat[i]] | |
return result | |
# Quick consistency check on a small image... | |
original_labels = (100*numpy.random.random( (100,100) )).astype(numpy.uint32) | |
mapping = { k : k + 99 for k in range(100) } | |
expected = original_labels + 99 | |
assert ( expected == using_index_array() ).all() | |
assert ( expected == using_vectorize_with_lambda() ).all() | |
assert ( expected == using_frompyfunc_with_lambda() ).all() | |
assert ( expected == using_frompyfunc() ).all() | |
assert ( expected == using_vectorize() ).all() | |
assert ( expected == using_plain_forloop() ).all() | |
import timeit | |
original_labels = (100*numpy.random.random( (10000,10000) )).astype(numpy.uint32) | |
mapping = { k : k + 99 for k in range(100) } | |
print "With 100 labels:\n" | |
print "using_vectorize_with_lambda", timeit.timeit( "using_vectorize_with_lambda()", "from __main__ import using_vectorize_with_lambda", number=1 ) | |
print "using_frompyfunc_with_lambda", timeit.timeit( "using_frompyfunc_with_lambda()", "from __main__ import using_frompyfunc_with_lambda", number=1 ) | |
print "using_vectorize", timeit.timeit( "using_vectorize()", "from __main__ import using_vectorize", number=1 ) | |
print "using_frompyfunc", timeit.timeit( "using_frompyfunc()", "from __main__ import using_frompyfunc", number=1 ) | |
print "using_index_array", timeit.timeit( "using_index_array()", "from __main__ import using_index_array", number=1 ) | |
#print "using_plain_forloop", timeit.timeit( "using_plain_forloop()", "from __main__ import using_plain_forloop", number=1 ) | |
print "" | |
original_labels = (10000*numpy.random.random( (10000,10000) )).astype(numpy.uint32) | |
mapping = { k : k + 99 for k in range(10000) } | |
print "With 10000 labels:\n" | |
print "using_vectorize_with_lambda", timeit.timeit( "using_vectorize_with_lambda()", "from __main__ import using_vectorize_with_lambda", number=1 ) | |
print "using_frompyfunc_with_lambda", timeit.timeit( "using_frompyfunc_with_lambda()", "from __main__ import using_frompyfunc_with_lambda", number=1 ) | |
print "using_vectorize", timeit.timeit( "using_vectorize()", "from __main__ import using_vectorize", number=1 ) | |
print "using_frompyfunc", timeit.timeit( "using_frompyfunc()", "from __main__ import using_frompyfunc", number=1 ) | |
print "using_index_array", timeit.timeit( "using_index_array()", "from __main__ import using_index_array", number=1 ) | |
#print "using_plain_forloop", timeit.timeit( "using_plain_forloop()", "from __main__ import using_plain_forloop", number=1 ) | |
original_labels = (30000*numpy.random.random( (10000,10000) )).astype(numpy.uint32) | |
mapping = { k : k + 99 for k in range(1000000) } | |
print "With 30,000 labels using 1M entry map:\n" | |
print "using_index_array", timeit.timeit( "using_index_array()", "from __main__ import using_index_array", number=1 ) | |
print "using_frompyfunc", timeit.timeit( "using_frompyfunc()", "from __main__ import using_frompyfunc", number=1 ) | |
print "" | |
original_labels = (30000*numpy.random.random( (10000,10000) )).astype(numpy.uint32) | |
mapping = { k : k + 99 for k in range(10000000) } | |
print "With 30,000 labels using 10M entry map:\n" | |
print "using_index_array", timeit.timeit( "using_index_array()", "from __main__ import using_index_array", number=1 ) | |
print "using_frompyfunc", timeit.timeit( "using_frompyfunc()", "from __main__ import using_frompyfunc", number=1 ) | |
print "" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example results on my MacBook Pro:
$ python relabel_benchmarks.py
With 100 labels:
using_vectorize_with_lambda 30.7045938969
using_frompyfunc_with_lambda 22.6237640381
using_vectorize 20.1000521183
using_frompyfunc 13.0213320255
using_index_array 6.54346990585
With 10000 labels:
using_vectorize_with_lambda 33.634442091
using_frompyfunc_with_lambda 24.5189290047
using_vectorize 21.9436271191
using_frompyfunc 14.8590140343
using_index_array 11.5993220806
Observations:
numpy.searchsorted()
.dict.__getitem__
directly.