Last active
August 29, 2015 14:16
-
-
Save zed/62476156d065635cce0c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Measure relative performance of "count overlapping substrings" functions. | |
http://hashcode.ru/questions/404985/python-%D1%85%D0%B8%D1%82%D1%80%D0%BE%D0%B5-%D0%B2%D1%85%D0%BE%D0%B6%D0%B4%D0%B5%D0%BD%D0%B8%D0%B5-%D0%BF%D0%BE%D0%B4%D1%81%D1%82%D1%80%D0%BE%D0%BA%D0%B8-%D0%B2-%D1%81%D1%82%D1%80%D0%BE%D0%BA%D1%83 | |
""" | |
import re | |
def count_overlapping_substrings(haystack, needle): | |
count = 0 | |
i = -1 | |
while True: | |
i = haystack.find(needle, i+1) | |
if i == -1: | |
return count | |
count += 1 | |
def count_overlapping_substrings_re(string, substring): | |
substring_re = '(?=%s)' % re.escape(substring) | |
return len(re.findall(substring_re, string)) | |
# count_substring_*() are from https://gist.github.com/ReinRaus/8846dc49fc532d4e1b28 | |
def count_substrings_re( string, substring ): | |
substring_re = '(?=(%s))' % re.escape( substring ) | |
return len( re.findall( substring_re, string ) ) | |
def count_substrings_re_opt(string, substring): | |
if len( substring )==1: | |
substring_re = re.escape( substring ) | |
else: | |
substring_re = re.escape( substring[0] ) + "(?=" + re.escape( substring[1:] ) + ")" | |
return len( re.findall( substring_re, string ) ) | |
def count_substrings( s, f ): | |
ind = 1 | |
count = 0 | |
while ind != -1: | |
ind = s.find( f ) | |
if ind >= 0: | |
count += 1 | |
s = s[ind+1:] | |
return count | |
if __name__ == '__main__': | |
from reporttime import accept_funcs, get_functions_with_prefix, measure # https://gist.github.com/zed/5650097 | |
@accept_funcs | |
def test_funcs(funcs, args): | |
"""Check that all functions produce the same result.""" | |
expected = funcs[0][1](*args) | |
for name, f in funcs: | |
r = f(*args) | |
assert r == expected, (r, expected, name, args) | |
funcs = get_functions_with_prefix('count_') | |
test_string = "avavrewwevavavewrewrew " + "vavvavavavavrewwevavavewrewrew" * 8 + "vavvavav" | |
args = [test_string, 'vav'] | |
test_funcs(funcs, args) | |
measure(funcs, args, comment='test_string') | |
"""Results: | |
name time ratio comment | |
count_substrings_re_opt 17.3 usec 1.00 test_string | |
count_overlapping_substrings 19.2 usec 1.11 test_string | |
count_overlapping_substrings_re 19.5 usec 1.13 test_string | |
count_substrings_re 22.7 usec 1.31 test_string | |
count_substrings 23.2 usec 1.34 test_string | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment