Skip to content

Instantly share code, notes, and snippets.

@scottyallen
Created September 19, 2016 14:44
Show Gist options
  • Save scottyallen/f14fc17c913dcc9707e5a6123bd00ad9 to your computer and use it in GitHub Desktop.
Save scottyallen/f14fc17c913dcc9707e5a6123bd00ad9 to your computer and use it in GitHub Desktop.
import re
import sys
import gflags
import sqlsoup
gflags.DEFINE_integer('limit', None, 'Max number of records to look at.')
FLAGS = gflags.FLAGS
RED = '\033[91m'
END = '\033[0m'
ignored = set(u' "-;\xA0,\xab\xbb0123456789().\u2116/\n\r:?\\%*=+')
def colorize(input):
output = []
for c in input:
if is_cyrillic(c) or c in ignored:
output.append(c.encode('utf8'))
else:
output.extend([RED, c.encode('utf8'), END])
return ''.join(output)
def has_non_cyrillic(input):
for c in input:
if not is_cyrillic(c) and c not in ignored:
return True
return False
def is_cyrillic(c):
return ord(c) >= 0x0400 and ord(c) < 0x0500
def is_interesting_char(c):
return not is_cyrillic(c) and c not in ignored
def biterate(input):
output = []
for c in input:
if is_cyrillic(c):
output.append('0')
elif c in ignored:
output.append(' ')
else:
output.append('1')
return ''.join(output)
def is_evil(input):
bit_str = biterate(input)
if re.search(r'01{1,2}(0| |$)|(0| |^)1{1,2}0', bit_str):
return True
else:
return False
def main(argv):
db = sqlsoup.SQLSoup('mysql://root@localhost/kloop')
if FLAGS.limit:
rows = db.tenders_raw.limit(FLAGS.limit)
else:
rows = db.tenders_raw.all()
for row in rows:
title = row.title.decode('utf8')
if has_non_cyrillic(title) and is_evil(title):
print int(row.amount), row.tender_num, colorize(title)
print
if __name__ == '__main__':
argv = gflags.FLAGS(sys.argv)
main(argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment