Last active
May 1, 2017 19:45
-
-
Save kkweon/5f69d564b6258f6358c7f1529470b55b to your computer and use it in GitHub Desktop.
counter vs dict
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cpdef dict cython_count(list list_): | |
cdef dict result | |
cdef int i | |
cdef unicode word | |
cdef int end_index = len(list_) | |
result = {} | |
for i in range(end_index): | |
word = list_[i] | |
if word in result: | |
result[word] += 1 | |
else: | |
result[word] = 1 | |
return result |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from collections import defaultdict | |
from timeit import timeit | |
from cython_counter import cython_count | |
import cProfile | |
import requests | |
print("LOADING TEST_STRING") | |
TEST_STRING = requests.get("http://www.onemilescroll.com/").text | |
TEST_STRING += requests.get("http://www.facebook.com").text | |
TEST_STRING += requests.get("http://www.naver.com").text | |
TEST_STRING += requests.get("https://jsonplaceholder.typicode.com/photos").text | |
TEST_STRING = TEST_STRING.split(" ") | |
def counter(list_): | |
return Counter(list_) | |
def manual_dict(list_): | |
result = {} | |
for w in list_: | |
if w in result: | |
result[w] += 1 | |
else: | |
result[w] = 1 | |
return result | |
def default_dict(list_): | |
result = defaultdict(int) | |
for w in list_: | |
result[w] += 1 | |
return result | |
def run_profiler(TEST_STMT): | |
print(TEST_STMT) | |
cProfile.run(TEST_STMT) | |
print() | |
def run_timeit(TEST_STMT, number=1000, globals={"TEST_STRING": TEST_STRING}): | |
time = timeit(TEST_STMT, | |
setup="from __main__ import {}".format(TEST_STMT.split("(")[0]), | |
globals=globals, | |
number=number) | |
print("{:<30} (1000 runs): {:>}".format(TEST_STMT, time)) | |
if __name__ == '__main__': | |
assert counter(TEST_STRING) == manual_dict(TEST_STRING) | |
assert counter(TEST_STRING) == manual_dict(TEST_STRING) | |
assert counter(TEST_STRING) == cython_count(TEST_STRING) | |
print("WORDS: ", len(counter(TEST_STRING).keys())) | |
TEST_1 = "counter(TEST_STRING)" | |
TEST_2 = "manual_dict(TEST_STRING)" | |
TEST_3 = "default_dict(TEST_STRING)" | |
TEST_4 = "cython_count(TEST_STRING)" | |
TEST_LIST = [TEST_1, TEST_2, TEST_3, TEST_4] | |
[run_timeit(test) for test in TEST_LIST] | |
[run_profiler(test) for test in TEST_LIST] | |
""" | |
LOADING TEST_STRING | |
WORDS: 22900 | |
[Timeit] | |
counter(TEST_STRING) (1000 runs): 11.974506849001045 | |
manual_dict(TEST_STRING) (1000 runs): 21.34182666000561 | |
default_dict(TEST_STRING) (1000 runs): 19.588844418998633 | |
cython_count(TEST_STRING) (1000 runs): 13.365923473000294 | |
[Profiler] | |
counter(TEST_STRING) | |
13 function calls in 0.012 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 0.012 0.012 <string>:1(<module>) | |
1 0.000 0.000 0.012 0.012 __init__.py:516(__init__) | |
1 0.000 0.000 0.012 0.012 __init__.py:585(update) | |
2 0.000 0.000 0.000 0.000 _weakrefset.py:70(__contains__) | |
1 0.000 0.000 0.000 0.000 abc.py:178(__instancecheck__) | |
1 0.000 0.000 0.012 0.012 main.py:17(counter) | |
1 0.012 0.012 0.012 0.012 {built-in method _collections._count_elements} | |
1 0.000 0.000 0.012 0.012 {built-in method builtins.exec} | |
1 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance} | |
2 0.000 0.000 0.000 0.000 {built-in method builtins.len} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
manual_dict(TEST_STRING) | |
4 function calls in 0.022 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 0.022 0.022 <string>:1(<module>) | |
1 0.021 0.021 0.021 0.021 main.py:21(manual_dict) | |
1 0.000 0.000 0.022 0.022 {built-in method builtins.exec} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
default_dict(TEST_STRING) | |
4 function calls in 0.020 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 0.020 0.020 <string>:1(<module>) | |
1 0.019 0.019 0.019 0.019 main.py:31(default_dict) | |
1 0.000 0.000 0.020 0.020 {built-in method builtins.exec} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
cython_count(TEST_STRING) | |
4 function calls in 0.013 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 0.013 0.013 <string>:1(<module>) | |
1 0.000 0.000 0.013 0.013 {built-in method builtins.exec} | |
1 0.013 0.013 0.013 0.013 {built-in method cython_counter.cython_count} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment