Created
January 5, 2017 14:04
-
-
Save purpleP/e14a30c17db5ec9345d559fc071dec4e to your computer and use it in GitHub Desktop.
comparing different string search methods
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ----------------------------------------------------------------------------------- benchmark: 3 tests ----------------------------------------------------------------------------------- | |
| Name (time in us) Min Max Mean StdDev Median IQR Outliers(*) Rounds Iterations | |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | |
| test_search[re_compile] 452.1870 (1.0) 1,199.2530 (1.13) 471.1449 (1.0) 56.5338 (1.0) 455.0175 (1.0) 3.9840 (1.0) 120;321 1844 1 | |
| test_search[no_compile] 521.2920 (1.15) 1,059.7880 (1.0) 554.4658 (1.18) 92.9603 (1.64) 526.5430 (1.16) 5.2185 (1.31) 28;75 388 1 | |
| test_search[endswith] 1,601.1760 (3.54) 3,252.0090 (3.07) 1,691.4124 (3.59) 246.1582 (4.35) 1,604.0630 (3.53) 38.6730 (9.71) 39;102 520 1 | |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random as rnd | |
| from itertools import cycle, product | |
| import re | |
| import pytest | |
| def search_re_no_compile(search_items, strings_to_search): | |
| p = '(' + '|'.join(search_items) + ')$' | |
| return tuple(re.search(p, s) for s in strings_to_search) | |
| def search_re_compile(search_items, strings_to_search): | |
| p = re.compile('(' + '|'.join(search_items) + ')$') | |
| return tuple(p.search(s) for s in strings_to_search) | |
| def search_simple(search_items, strings_to_search): | |
| return tuple( | |
| next((True for si in search_items if s.endswith(si)), False) | |
| for s in strings_to_search | |
| ) | |
| search_items = tuple(str(1000 + i) for i in range(100)) | |
| random_data = tuple(str(rnd.randint(0, 100000)) * 100 for i in range(10000)) | |
| strings_to_search = tuple( | |
| si + rd if should_match else si | |
| for si, rd, should_match in | |
| zip(search_items, random_data, cycle((True, False))) | |
| ) | |
| @pytest.mark.parametrize( | |
| 'search_func', | |
| (search_re_no_compile, search_re_compile, search_simple), | |
| ids=('no_compile', 're_compile', 'endswith') | |
| ) | |
| def test_search(benchmark, search_func): | |
| benchmark(search_func, search_items, strings_to_search) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment