Skip to content

Instantly share code, notes, and snippets.

@kkweon
Last active May 1, 2017 19:45
Show Gist options
  • Save kkweon/5f69d564b6258f6358c7f1529470b55b to your computer and use it in GitHub Desktop.
Save kkweon/5f69d564b6258f6358c7f1529470b55b to your computer and use it in GitHub Desktop.
counter vs dict
cpdef dict cython_count(list list_):
cdef dict result
cdef int i
cdef unicode word
cdef int end_index = len(list_)
result = {}
for i in range(end_index):
word = list_[i]
if word in result:
result[word] += 1
else:
result[word] = 1
return result
from collections import Counter
from collections import defaultdict
from timeit import timeit
from cython_counter import cython_count
import cProfile
import requests
print("LOADING TEST_STRING")
TEST_STRING = requests.get("http://www.onemilescroll.com/").text
TEST_STRING += requests.get("http://www.facebook.com").text
TEST_STRING += requests.get("http://www.naver.com").text
TEST_STRING += requests.get("https://jsonplaceholder.typicode.com/photos").text
TEST_STRING = TEST_STRING.split(" ")
def counter(list_):
return Counter(list_)
def manual_dict(list_):
result = {}
for w in list_:
if w in result:
result[w] += 1
else:
result[w] = 1
return result
def default_dict(list_):
result = defaultdict(int)
for w in list_:
result[w] += 1
return result
def run_profiler(TEST_STMT):
print(TEST_STMT)
cProfile.run(TEST_STMT)
print()
def run_timeit(TEST_STMT, number=1000, globals={"TEST_STRING": TEST_STRING}):
time = timeit(TEST_STMT,
setup="from __main__ import {}".format(TEST_STMT.split("(")[0]),
globals=globals,
number=number)
print("{:<30} (1000 runs): {:>}".format(TEST_STMT, time))
if __name__ == '__main__':
assert counter(TEST_STRING) == manual_dict(TEST_STRING)
assert counter(TEST_STRING) == manual_dict(TEST_STRING)
assert counter(TEST_STRING) == cython_count(TEST_STRING)
print("WORDS: ", len(counter(TEST_STRING).keys()))
TEST_1 = "counter(TEST_STRING)"
TEST_2 = "manual_dict(TEST_STRING)"
TEST_3 = "default_dict(TEST_STRING)"
TEST_4 = "cython_count(TEST_STRING)"
TEST_LIST = [TEST_1, TEST_2, TEST_3, TEST_4]
[run_timeit(test) for test in TEST_LIST]
[run_profiler(test) for test in TEST_LIST]
"""
LOADING TEST_STRING
WORDS: 22900
[Timeit]
counter(TEST_STRING) (1000 runs): 11.974506849001045
manual_dict(TEST_STRING) (1000 runs): 21.34182666000561
default_dict(TEST_STRING) (1000 runs): 19.588844418998633
cython_count(TEST_STRING) (1000 runs): 13.365923473000294
[Profiler]
counter(TEST_STRING)
13 function calls in 0.012 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.012 0.012 <string>:1(<module>)
1 0.000 0.000 0.012 0.012 __init__.py:516(__init__)
1 0.000 0.000 0.012 0.012 __init__.py:585(update)
2 0.000 0.000 0.000 0.000 _weakrefset.py:70(__contains__)
1 0.000 0.000 0.000 0.000 abc.py:178(__instancecheck__)
1 0.000 0.000 0.012 0.012 main.py:17(counter)
1 0.012 0.012 0.012 0.012 {built-in method _collections._count_elements}
1 0.000 0.000 0.012 0.012 {built-in method builtins.exec}
1 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}
2 0.000 0.000 0.000 0.000 {built-in method builtins.len}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
manual_dict(TEST_STRING)
4 function calls in 0.022 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.022 0.022 <string>:1(<module>)
1 0.021 0.021 0.021 0.021 main.py:21(manual_dict)
1 0.000 0.000 0.022 0.022 {built-in method builtins.exec}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
default_dict(TEST_STRING)
4 function calls in 0.020 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.020 0.020 <string>:1(<module>)
1 0.019 0.019 0.019 0.019 main.py:31(default_dict)
1 0.000 0.000 0.020 0.020 {built-in method builtins.exec}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
cython_count(TEST_STRING)
4 function calls in 0.013 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.013 0.013 <string>:1(<module>)
1 0.000 0.000 0.013 0.013 {built-in method builtins.exec}
1 0.013 0.013 0.013 0.013 {built-in method cython_counter.cython_count}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment