Created
September 19, 2016 14:44
-
-
Save scottyallen/f14fc17c913dcc9707e5a6123bd00ad9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
import gflags | |
import sqlsoup | |
gflags.DEFINE_integer('limit', None, 'Max number of records to look at.') | |
FLAGS = gflags.FLAGS | |
RED = '\033[91m' | |
END = '\033[0m' | |
ignored = set(u' "-;\xA0,\xab\xbb0123456789().\u2116/\n\r:?\\%*=+') | |
def colorize(input): | |
output = [] | |
for c in input: | |
if is_cyrillic(c) or c in ignored: | |
output.append(c.encode('utf8')) | |
else: | |
output.extend([RED, c.encode('utf8'), END]) | |
return ''.join(output) | |
def has_non_cyrillic(input): | |
for c in input: | |
if not is_cyrillic(c) and c not in ignored: | |
return True | |
return False | |
def is_cyrillic(c): | |
return ord(c) >= 0x0400 and ord(c) < 0x0500 | |
def is_interesting_char(c): | |
return not is_cyrillic(c) and c not in ignored | |
def biterate(input): | |
output = [] | |
for c in input: | |
if is_cyrillic(c): | |
output.append('0') | |
elif c in ignored: | |
output.append(' ') | |
else: | |
output.append('1') | |
return ''.join(output) | |
def is_evil(input): | |
bit_str = biterate(input) | |
if re.search(r'01{1,2}(0| |$)|(0| |^)1{1,2}0', bit_str): | |
return True | |
else: | |
return False | |
def main(argv): | |
db = sqlsoup.SQLSoup('mysql://root@localhost/kloop') | |
if FLAGS.limit: | |
rows = db.tenders_raw.limit(FLAGS.limit) | |
else: | |
rows = db.tenders_raw.all() | |
for row in rows: | |
title = row.title.decode('utf8') | |
if has_non_cyrillic(title) and is_evil(title): | |
print int(row.amount), row.tender_num, colorize(title) | |
if __name__ == '__main__': | |
argv = gflags.FLAGS(sys.argv) | |
main(argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment