-
-
Save drslump/5599280 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
u""" | |
Created on May 15, 2013 | |
@author: pablito56 | |
@license: MIT | |
@contact: [email protected] | |
Performance comparisson of different options to check existence of forbidden | |
symbols in incoming strings (standard 'char in string' vs. RegExp). | |
It checks 1, 3, 5 or 8 forbidden characters existence in 1k, 10k or 100k words | |
of 15, 20 or 30 random characters length. | |
It tests worst case; forbidden chars are never found. | |
Results at the end of the file | |
""" | |
from string import ascii_letters, punctuation, digits | |
from random import seed, choice | |
import timeit | |
import re | |
from forbidden import forbidden as c_forbidden, forbidden_list as c_forbidden_list | |
# We check worse case; symbols will never be found | |
chars = ascii_letters + digits # + punctuation | |
seed() | |
forbidden_8 = (":", "/", "?", "#", "~", "=", "&", "!") | |
forbidden_1 = forbidden_8[0] | |
forbidden_1_re = re.compile(forbidden_1) | |
def check_forbidden(): | |
num_times = 1000 | |
symbols_amount = (3, 5, 8) | |
words_amount = (1000, 10000, 100000) | |
wrods_sizes = (15, 20, 30) | |
header = "\t| {:^10} | {:^10} | {:^10} | {:^10} |".format("# SYMBOLS", "TIME NO RE", "TIME RE", "RE SpeedUp") | |
log_trace = "\t| {:^10} | {:^10.5f} | {:^10.5f} | {:^10.5f} |" | |
for num_words in words_amount: | |
for size in wrods_sizes: | |
print "CHECKING {} WORDS OF {} CHARS LENGTH:".format(num_words, size) | |
words = ["".join([choice(chars) for i in xrange(size)]) for j in xrange(num_words)] | |
print header | |
def find_forbidden_1(): | |
return c_forbidden_list(words, forbidden_1) | |
#return map(lambda w: c_forbidden(w, forbidden_1), words) | |
#return map(lambda w: forbidden_1 in w, words) | |
def find_forbidden_1_re(): | |
return map(lambda w: forbidden_1_re.search(w) is not None, words) | |
t_std = timeit.timeit(find_forbidden_1, number=num_times) | |
t_res = timeit.timeit(find_forbidden_1_re, number=num_times) | |
print log_trace.format(1, t_std, t_res, t_std / t_res) | |
for num_symbols in symbols_amount: | |
forbidden = ''.join(forbidden_8[:num_symbols]) | |
forbidden_re = re.compile("[" + "".join(forbidden) + "]") | |
def find_forbidden_N(): | |
return c_forbidden_list(words, forbidden) | |
#return map(lambda w: c_forbidden(w, forbidden), words) | |
# return map(lambda w: any((f in w for f in forbidden)), words) | |
def find_forbidden_N_re(): | |
return map(lambda w: forbidden_re.search(w) is not None, words) | |
t_std = timeit.timeit(find_forbidden_N, number=num_times) | |
t_res = timeit.timeit(find_forbidden_N_re, number=num_times) | |
print log_trace.format(num_symbols, t_std, t_res, t_std / t_res) | |
if __name__ == '__main__': | |
check_forbidden() | |
u""" | |
Results on MacBook Pro 15" 2 Ghz Intel Core i7 4Gb | |
CHECKING 1000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.39066 | 0.44744 | 0.87311 | | |
| 3 | 0.37141 | 0.58636 | 0.63341 | | |
| 5 | 0.37210 | 0.57790 | 0.64388 | | |
| 8 | 0.37863 | 0.58247 | 0.65005 | | |
CHECKING 1000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.39521 | 0.45214 | 0.87408 | | |
| 3 | 0.37573 | 0.62202 | 0.60404 | | |
| 5 | 0.37896 | 0.62064 | 0.61059 | | |
| 8 | 0.38474 | 0.62324 | 0.61733 | | |
CHECKING 1000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.40622 | 0.46979 | 0.86468 | | |
| 3 | 0.38748 | 0.70775 | 0.54748 | | |
| 5 | 0.39146 | 0.70474 | 0.55547 | | |
| 8 | 0.39701 | 0.70598 | 0.56236 | | |
CHECKING 10000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 3.83916 | 4.50477 | 0.85224 | | |
| 3 | 3.66388 | 5.82184 | 0.62933 | | |
| 5 | 3.69978 | 5.79355 | 0.63860 | | |
| 8 | 3.77003 | 5.80201 | 0.64978 | | |
CHECKING 10000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 3.94811 | 4.56033 | 0.86575 | | |
| 3 | 3.73949 | 6.53689 | 0.57206 | | |
| 5 | 3.88055 | 6.35791 | 0.61035 | | |
| 8 | 4.01171 | 6.26753 | 0.64008 | | |
CHECKING 10000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 4.05587 | 4.68777 | 0.86520 | | |
| 3 | 3.92409 | 7.26555 | 0.54010 | | |
| 5 | 3.95718 | 7.24153 | 0.54646 | | |
| 8 | 4.08453 | 7.07470 | 0.57734 | | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Python.h> | |
#include <string.h> | |
#include <stddef.h> | |
static PyObject * | |
forbidden(PyObject *self, PyObject *args) | |
{ | |
const char *word; | |
const char *chars; | |
if (!PyArg_ParseTuple(args, "ss", &word, &chars)) | |
return NULL; | |
if (strlen(word) != strcspn(word, chars)) { | |
Py_RETURN_TRUE; | |
} | |
Py_RETURN_FALSE; | |
} | |
static PyObject * | |
forbidden_list(PyObject *self, PyObject *args) | |
{ | |
const char *chars; | |
PyObject *words; | |
PyObject *list; | |
PyObject *tmp; | |
long int idx; | |
long int len; | |
const char *word; | |
if (!PyArg_ParseTuple(args, "Os", &words, &chars)) | |
return NULL; | |
len = PyList_Size(words); | |
list = PyList_New(0); | |
for (idx=0; idx<len; idx++) { | |
tmp = PyList_GetItem(words, idx); | |
Py_INCREF(tmp); | |
word = PyString_AsString(tmp); | |
if (strlen(word) != strcspn(word, chars)) { | |
PyList_Append(list, Py_True); | |
Py_INCREF(Py_True); | |
} else { | |
PyList_Append(list, Py_False); | |
Py_INCREF(Py_False); | |
} | |
Py_DECREF(tmp); | |
} | |
return list; | |
} | |
static char forbidden_docs[] = | |
"forbidden(word, chars): Looks for forbidden chars\n"; | |
static char forbidden_list_docs[] = | |
"forbidden(words, chars): Looks for forbidden chars in the list of words\n"; | |
static PyMethodDef forbidden_funcs[] = { | |
{"forbidden", (PyCFunction)forbidden, METH_VARARGS, forbidden_docs}, | |
{"forbidden_list", (PyCFunction)forbidden_list, METH_VARARGS, forbidden_list_docs}, | |
{NULL, NULL, 0, NULL} | |
}; | |
void initforbidden(void) | |
{ | |
Py_InitModule3("forbidden", forbidden_funcs, "Finds forbidden chars"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from distutils.core import setup, Extension | |
setup(name='forbidden', version='1.0', ext_modules=[Extension('forbidden', ['forbidden.c'])]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Messi!! 💃