Skip to content

Instantly share code, notes, and snippets.

@sapamja
Last active August 29, 2015 13:57
Show Gist options
  • Save sapamja/9855018 to your computer and use it in GitHub Desktop.
Save sapamja/9855018 to your computer and use it in GitHub Desktop.
Profiling various way of counting repeated item from python list
#!/usr/bin/python
import cProfile
from faker import Faker
from timeit import Timer
from collections import Counter
from collections import defaultdict
# old style using dictionary:
def get_count1(sequence):
counts = {}
for x in sequence:
if x in counts:
counts[x] += 1
else:
counts[x] = 1
return counts
def get_count2(sequence):
counts = defaultdict(int)
for x in sequence:
counts[x] += 1
return counts
def get_count3(sequence):
counts = {}
for x in sequence:
counts[x] = counts.get(x, 0) + 1
return counts
def get_count4(sequence):
return Counter(sequence)
def get_count5(sequence):
seq = set(sequence)
counts = {}
for i in seq:
counts[i] = sequence.count(i)
return counts
s = Faker()
seq = [ s.random_number(digits=3) for x in range(1000) ]
#seq = [1,1,1,1,12,22,2,2,2,23,3,4,4,5,4]
func = [ get_count1, get_count2, get_count3, get_count4, get_count5 ]
for f in func:
t = Timer(lambda: f(seq))
print f.__name__, cProfile.run('t.timeit(number=1000)')
get_count1 2011 function calls in 0.211 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.211 0.211 <string>:1(<module>)
1000 0.207 0.000 0.207 0.000 counts_repeated.py:10(get_count1)
1000 0.000 0.000 0.208 0.000 counts_repeated.py:49(<lambda>)
1 0.000 0.000 0.000 0.000 timeit.py:143(setup)
1 0.000 0.000 0.211 0.211 timeit.py:178(timeit)
1 0.003 0.003 0.211 0.211 timeit.py:96(inner)
1 0.000 0.000 0.000 0.000 {gc.disable}
1 0.000 0.000 0.000 0.000 {gc.enable}
1 0.000 0.000 0.000 0.000 {gc.isenabled}
1 0.000 0.000 0.000 0.000 {globals}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
2 0.000 0.000 0.000 0.000 {time.time}
None
get_count2 2011 function calls in 0.329 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.329 0.329 <string>:1(<module>)
1000 0.325 0.000 0.325 0.000 counts_repeated.py:19(get_count2)
1000 0.000 0.000 0.326 0.000 counts_repeated.py:49(<lambda>)
1 0.000 0.000 0.000 0.000 timeit.py:143(setup)
1 0.000 0.000 0.329 0.329 timeit.py:178(timeit)
1 0.004 0.004 0.329 0.329 timeit.py:96(inner)
1 0.000 0.000 0.000 0.000 {gc.disable}
1 0.000 0.000 0.000 0.000 {gc.enable}
1 0.000 0.000 0.000 0.000 {gc.isenabled}
1 0.000 0.000 0.000 0.000 {globals}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
2 0.000 0.000 0.000 0.000 {time.time}
None
get_count3 1002011 function calls in 0.485 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.485 0.485 <string>:1(<module>)
1000 0.372 0.000 0.481 0.000 counts_repeated.py:25(get_count3)
1000 0.000 0.000 0.481 0.000 counts_repeated.py:49(<lambda>)
1 0.000 0.000 0.000 0.000 timeit.py:143(setup)
1 0.000 0.000 0.485 0.485 timeit.py:178(timeit)
1 0.004 0.004 0.485 0.485 timeit.py:96(inner)
1 0.000 0.000 0.000 0.000 {gc.disable}
1 0.000 0.000 0.000 0.000 {gc.enable}
1 0.000 0.000 0.000 0.000 {gc.isenabled}
1 0.000 0.000 0.000 0.000 {globals}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1000000 0.109 0.000 0.109 0.000 {method 'get' of 'dict' objects}
2 0.000 0.000 0.000 0.000 {time.time}
None
get_count4 1009045 function calls (1009042 primitive calls) in 0.609 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.609 0.609 <string>:1(<module>)
2 0.000 0.000 0.000 0.000 _abcoll.py:97(__subclasshook__)
2 0.000 0.000 0.000 0.000 _weakrefset.py:16(__init__)
2 0.000 0.000 0.000 0.000 _weakrefset.py:20(__enter__)
2 0.000 0.000 0.000 0.000 _weakrefset.py:26(__exit__)
2 0.000 0.000 0.000 0.000 _weakrefset.py:36(__init__)
2 0.000 0.000 0.000 0.000 _weakrefset.py:52(_commit_removals)
4 0.000 0.000 0.000 0.000 _weakrefset.py:58(__iter__)
2001 0.001 0.000 0.001 0.000 _weakrefset.py:68(__contains__)
2 0.000 0.000 0.000 0.000 _weakrefset.py:81(add)
1000 0.002 0.000 0.004 0.000 abc.py:128(__instancecheck__)
2/1 0.000 0.000 0.000 0.000 abc.py:148(__subclasscheck__)
1000 0.002 0.000 0.603 0.001 collections.py:406(__init__)
1000 0.489 0.000 0.601 0.001 collections.py:469(update)
1000 0.001 0.000 0.604 0.001 counts_repeated.py:32(get_count4)
1000 0.000 0.000 0.605 0.001 counts_repeated.py:49(<lambda>)
1 0.000 0.000 0.000 0.000 timeit.py:143(setup)
1 0.000 0.000 0.609 0.609 timeit.py:178(timeit)
1 0.004 0.004 0.609 0.609 timeit.py:96(inner)
1 0.000 0.000 0.000 0.000 {gc.disable}
1 0.000 0.000 0.000 0.000 {gc.enable}
1 0.000 0.000 0.000 0.000 {gc.isenabled}
1002 0.000 0.000 0.000 0.000 {getattr}
1 0.000 0.000 0.000 0.000 {globals}
1000 0.001 0.000 0.004 0.000 {isinstance}
3/1 0.000 0.000 0.000 0.000 {issubclass}
2 0.000 0.000 0.000 0.000 {method '__subclasses__' of 'type' objects}
4 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1000000 0.108 0.000 0.108 0.000 {method 'get' of 'dict' objects}
2 0.000 0.000 0.000 0.000 {method 'remove' of 'set' objects}
2 0.000 0.000 0.000 0.000 {time.time}
None
get_count5 636011 function calls in 15.457 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 15.457 15.457 <string>:1(<module>)
1000 0.249 0.000 15.447 0.015 counts_repeated.py:35(get_count5)
1000 0.005 0.000 15.452 0.015 counts_repeated.py:49(<lambda>)
1 0.000 0.000 0.000 0.000 timeit.py:143(setup)
1 0.000 0.000 15.457 15.457 timeit.py:178(timeit)
1 0.005 0.005 15.457 15.457 timeit.py:96(inner)
1 0.000 0.000 0.000 0.000 {gc.disable}
1 0.000 0.000 0.000 0.000 {gc.enable}
1 0.000 0.000 0.000 0.000 {gc.isenabled}
1 0.000 0.000 0.000 0.000 {globals}
634000 15.198 0.000 15.198 0.000 {method 'count' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
2 0.000 0.000 0.000 0.000 {time.time}
None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment