Created
May 17, 2013 09:41
-
-
Save pablito56/5598063 to your computer and use it in GitHub Desktop.
Performance comparisson of different options to check existence of forbidden symbols in incoming strings (standard 'char in string' vs. RegExp).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
u""" | |
Created on May 15, 2013 | |
@author: pablito56 | |
@license: MIT | |
@contact: [email protected] | |
Performance comparisson of different options to check existence of forbidden | |
symbols in incoming strings (standard 'char in string' vs. RegExp). | |
It checks 1, 3, 5 or 8 forbidden characters existence in 1k, 10k or 100k words | |
of 15, 20 or 30 random characters length. | |
It tests worst case; forbidden chars are never found. | |
Results at the end of the file | |
""" | |
from string import ascii_letters, punctuation, digits | |
from random import seed, choice | |
import timeit | |
import re | |
# We check worse case; symbols will never be found | |
chars = ascii_letters + digits # + punctuation | |
seed() | |
forbidden_8 = (":", "/", "?", "#", "~", "=", "&", "!") | |
forbidden_1 = forbidden_8[0] | |
forbidden_1_re = re.compile(forbidden_1) | |
def check_forbidden(): | |
num_times = 1000 | |
symbols_amount = (3, 5, 8) | |
words_amount = (1000, 10000, 100000) | |
wrods_sizes = (15, 20, 30) | |
header = "\t| {:^10} | {:^10} | {:^10} | {:^10} |".format("# SYMBOLS", "TIME NO RE", "TIME RE", "RE SpeedUp") | |
log_trace = "\t| {:^10} | {:^10.5f} | {:^10.5f} | {:^10.5f} |" | |
for num_words in words_amount: | |
for size in wrods_sizes: | |
print "CHECKING {} WORDS OF {} CHARS LENGTH:".format(num_words, size) | |
words = ["".join([choice(chars) for i in xrange(size)]) for j in xrange(num_words)] | |
print header | |
def find_forbidden_1(): | |
return map(lambda w: forbidden_1 in w, words) | |
def find_forbidden_1_re(): | |
return map(lambda w: forbidden_1_re.search(w) is not None, words) | |
t_std = timeit.timeit(find_forbidden_1, number=num_times) | |
t_res = timeit.timeit(find_forbidden_1_re, number=num_times) | |
print log_trace.format(1, t_std, t_res, t_std / t_res) | |
for num_symbols in symbols_amount: | |
forbidden = forbidden_8[:num_symbols] | |
forbidden_re = re.compile("[" + "".join(forbidden) + "]") | |
def find_forbidden_N(): | |
return map(lambda w: any((f in w for f in forbidden)), words) | |
# return map(lambda w: any((f == c for f in forbidden for c in w)), words) # Several times slower | |
def find_forbidden_N_re(): | |
return map(lambda w: forbidden_re.search(w) is not None, words) | |
t_std = timeit.timeit(find_forbidden_N, number=num_times) | |
t_res = timeit.timeit(find_forbidden_N_re, number=num_times) | |
print log_trace.format(num_symbols, t_std, t_res, t_std / t_res) | |
if __name__ == '__main__': | |
check_forbidden() | |
u""" | |
Results on MacBook Pro 13" 2.4 Ghz Intel Core i5 8Gb | |
CHECKING 1000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.22396 | 0.51894 | 0.43157 | | |
| 3 | 1.23259 | 0.66840 | 1.84410 | | |
| 5 | 1.91099 | 0.69866 | 2.73521 | | |
| 8 | 2.21286 | 0.73737 | 3.00100 | | |
CHECKING 1000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.24142 | 0.56059 | 0.43064 | | |
| 3 | 1.35343 | 0.80293 | 1.68561 | | |
| 5 | 1.55931 | 0.69956 | 2.22900 | | |
| 8 | 2.27682 | 0.70398 | 3.23423 | | |
CHECKING 1000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.25355 | 0.56270 | 0.45060 | | |
| 3 | 1.21860 | 0.78617 | 1.55003 | | |
| 5 | 1.54455 | 0.78203 | 1.97506 | | |
| 8 | 2.04291 | 1.13801 | 1.79516 | | |
CHECKING 10000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 2.33030 | 5.25763 | 0.44322 | | |
| 3 | 12.56244 | 6.80868 | 1.84506 | | |
| 5 | 15.09999 | 6.68055 | 2.26029 | | |
| 8 | 19.07921 | 7.10648 | 2.68476 | | |
CHECKING 10000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 2.36260 | 5.80443 | 0.40703 | | |
| 3 | 12.29937 | 7.57868 | 1.62289 | | |
| 5 | 15.11851 | 7.56908 | 1.99740 | | |
| 8 | 19.59392 | 7.70403 | 2.54333 | | |
CHECKING 10000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 2.47049 | 5.85040 | 0.42228 | | |
| 3 | 12.75136 | 8.43134 | 1.51238 | | |
| 5 | 15.77369 | 8.77852 | 1.79685 | | |
| 8 | 20.65556 | 8.53470 | 2.42019 | | |
CHECKING 100000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 24.20128 | 55.73680 | 0.43421 | | |
| 3 | 122.62828 | 69.88992 | 1.75459 | | |
| 5 | 148.97942 | 68.43717 | 2.17688 | | |
| 8 | 188.52356 | 69.15813 | 2.72598 | | |
CHECKING 100000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 24.71635 | 55.20613 | 0.44771 | | |
| 3 | 122.12375 | 73.54637 | 1.66050 | | |
| 5 | 150.11232 | 72.87738 | 2.05979 | | |
| 8 | 191.79193 | 73.47913 | 2.61016 | | |
CHECKING 100000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 25.36807 | 56.23131 | 0.45114 | | |
| 3 | 125.01627 | 82.79500 | 1.50995 | | |
| 5 | 155.50607 | 82.55599 | 1.88364 | | |
| 8 | 204.95154 | 84.17276 | 2.43489 | | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment