-
-
Save javierarilos/5600253 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
u""" | |
Created on May 15, 2013 | |
@author: pablito56 | |
@license: MIT | |
@contact: [email protected] | |
Performance comparisson of different options to check existence of forbidden | |
symbols in incoming strings (standard 'char in string' vs. RegExp). | |
It checks 1, 3, 5 or 8 forbidden characters existence in 1k, 10k or 100k words | |
of 15, 20 or 30 random characters length. | |
It tests worst case; forbidden chars are never found. | |
Results at the end of the file | |
""" | |
from string import ascii_letters, punctuation, digits | |
from random import seed, choice | |
import timeit | |
import re | |
# We check worse case; symbols will never be found | |
chars = ascii_letters + digits # + punctuation | |
seed() | |
forbidden_8 = (":", "/", "?", "#", "~", "=", "&", "!") | |
forbidden_1 = forbidden_8[0] | |
forbidden_1_re = re.compile(forbidden_1) | |
print "Initializing py4j serving: Finder.jForbidden. Prerrequisite: have a Jvm running with the Java code." | |
from py4j.java_gateway import JavaGateway | |
gateway = JavaGateway() | |
finder = gateway.entry_point.getFinder() | |
print "Initialized Finder.jForbidden as finder." | |
def check_forbidden(): | |
num_times = 1000 | |
symbols_amount = (3, 5, 8) | |
words_amount = (1000, 10000, 100000) | |
wrods_sizes = (15, 20, 30) | |
header = "\t| {:^10} | {:^10} | {:^10} | {:^10} |".format("# SYMBOLS", "TIME NO RE", "TIME RE", "RE SpeedUp") | |
log_trace = "\t| {:^10} | {:^10.5f} | {:^10.5f} | {:^10.5f} |" | |
for num_words in words_amount: | |
for size in wrods_sizes: | |
print "CHECKING {} WORDS OF {} CHARS LENGTH:".format(num_words, size) | |
words = ["".join([choice(chars) for i in xrange(size)]) for j in xrange(num_words)] | |
print header | |
def find_forbidden_1(): | |
return map(lambda w: finder.jForbidden(w, forbidden_1), words) | |
#return map(lambda w: forbidden_1 in w, words) | |
def find_forbidden_1_re(): | |
return map(lambda w: forbidden_1_re.search(w) is not None, words) | |
t_std = timeit.timeit(find_forbidden_1, number=num_times) | |
t_res = timeit.timeit(find_forbidden_1_re, number=num_times) | |
print log_trace.format(1, t_std, t_res, t_std / t_res) | |
for num_symbols in symbols_amount: | |
forbidden = forbidden_8[:num_symbols] | |
forbidden_re = re.compile("[" + "".join(forbidden) + "]") | |
def find_forbidden_N(): | |
return map(lambda w: finder.jForbidden(w, forbidden_1), words) | |
# return map(lambda w: any((f in w for f in forbidden)), words) | |
# return map(lambda w: any((f == c for f in forbidden for c in w)), words) # Several times slower | |
def find_forbidden_N_re(): | |
return map(lambda w: forbidden_re.search(w) is not None, words) | |
t_std = timeit.timeit(find_forbidden_N, number=num_times) | |
t_res = timeit.timeit(find_forbidden_N_re, number=num_times) | |
print log_trace.format(num_symbols, t_std, t_res, t_std / t_res) | |
if __name__ == '__main__': | |
check_forbidden() | |
u""" | |
Results on MacBook Pro 13" 2.4 Ghz Intel Core i5 8Gb | |
CHECKING 1000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.22396 | 0.51894 | 0.43157 | | |
| 3 | 1.23259 | 0.66840 | 1.84410 | | |
| 5 | 1.91099 | 0.69866 | 2.73521 | | |
| 8 | 2.21286 | 0.73737 | 3.00100 | | |
CHECKING 1000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.24142 | 0.56059 | 0.43064 | | |
| 3 | 1.35343 | 0.80293 | 1.68561 | | |
| 5 | 1.55931 | 0.69956 | 2.22900 | | |
| 8 | 2.27682 | 0.70398 | 3.23423 | | |
CHECKING 1000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 0.25355 | 0.56270 | 0.45060 | | |
| 3 | 1.21860 | 0.78617 | 1.55003 | | |
| 5 | 1.54455 | 0.78203 | 1.97506 | | |
| 8 | 2.04291 | 1.13801 | 1.79516 | | |
CHECKING 10000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 2.33030 | 5.25763 | 0.44322 | | |
| 3 | 12.56244 | 6.80868 | 1.84506 | | |
| 5 | 15.09999 | 6.68055 | 2.26029 | | |
| 8 | 19.07921 | 7.10648 | 2.68476 | | |
CHECKING 10000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 2.36260 | 5.80443 | 0.40703 | | |
| 3 | 12.29937 | 7.57868 | 1.62289 | | |
| 5 | 15.11851 | 7.56908 | 1.99740 | | |
| 8 | 19.59392 | 7.70403 | 2.54333 | | |
CHECKING 10000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 2.47049 | 5.85040 | 0.42228 | | |
| 3 | 12.75136 | 8.43134 | 1.51238 | | |
| 5 | 15.77369 | 8.77852 | 1.79685 | | |
| 8 | 20.65556 | 8.53470 | 2.42019 | | |
CHECKING 100000 WORDS OF 15 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 24.20128 | 55.73680 | 0.43421 | | |
| 3 | 122.62828 | 69.88992 | 1.75459 | | |
| 5 | 148.97942 | 68.43717 | 2.17688 | | |
| 8 | 188.52356 | 69.15813 | 2.72598 | | |
CHECKING 100000 WORDS OF 20 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 24.71635 | 55.20613 | 0.44771 | | |
| 3 | 122.12375 | 73.54637 | 1.66050 | | |
| 5 | 150.11232 | 72.87738 | 2.05979 | | |
| 8 | 191.79193 | 73.47913 | 2.61016 | | |
CHECKING 100000 WORDS OF 30 CHARS LENGTH: | |
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp | | |
| 1 | 25.36807 | 56.23131 | 0.45114 | | |
| 3 | 125.01627 | 82.79500 | 1.50995 | | |
| 5 | 155.50607 | 82.55599 | 1.88364 | | |
| 8 | 204.95154 | 84.17276 | 2.43489 | | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
echo Installing py4j... | |
pip install py4j | |
echo compiling java code, $VIRTUAL_ENV points to your virtualenv root... | |
javac -cp .:$VIRTUAL_ENV/share/py4j/py4j0.7.jar *.java | |
echo executing the java EntryPoint... | |
java -cp .:$VIRTUAL_ENV/share/py4j/py4j0.7.jar FindForbiddenEntryPoint & | |
echo waiting to py4j server | |
sleep 1 | |
echo executing the check_forbidden.py script... | |
python check_forbidden.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import py4j.GatewayServer; | |
public class FindForbiddenEntryPoint { | |
public class Finder{ | |
public boolean jForbidden(String word, char forbidden_1){ | |
return word.indexOf(forbidden_1) > -1; | |
} | |
} | |
Finder finder = null; | |
public FindForbiddenEntryPoint() { | |
finder = new Finder(); | |
} | |
public Finder getFinder() { | |
return finder; | |
} | |
public static void main(String[] args) { | |
GatewayServer gatewayServer = new GatewayServer(new FindForbiddenEntryPoint()); | |
gatewayServer.start(); | |
System.out.println("Gateway Server Started"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment