Last active
December 15, 2015 01:59
-
-
Save mjamesruggiero/5184068 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fri Mar 15 21:31:59 PDT 2013 | |
# want a histogram of queries | |
import re | |
import logging | |
import csv | |
def clean(line): | |
return remove_newlines(remove_times(sub_digits(line))) | |
def sub_digits(old_string, subst="x"): | |
"""substitute all digits with something else""" | |
return re.sub("\d+", subst, old_string) | |
def only_selects(lines): | |
"""filter out lines that are not SELECTs""" | |
for line in lines: | |
if "SELECT" in line: | |
yield unicode(line, "UTF-8") | |
def remove_times(log_line): | |
"""this could be better for the control chars""" | |
start_loc = log_line.find('SELECT') | |
end_loc = log_line.find(';') | |
return log_line[start_loc:end_loc] | |
def remove_newlines(old_string, subst=''): | |
return re.sub("\n", subst, old_string ) | |
def make_histogram(lines): | |
"""make an unsorted query count""" | |
d = {} | |
for line in lines: | |
d[line] = d.get(line, 0) + 1 | |
return d | |
def save_to_file(query_counts, threshold=0): | |
with open('repetitious_queries.csv', 'wa') as csvfile: | |
writer = csv.writer(csvfile) | |
for count, query in query_counts: | |
if count > threshold: | |
writer.writerow([query, count]) | |
if __name__ == '__main__': | |
FORMAT = '%(asctime)-15s %(message)s' | |
logging.basicConfig(format=FORMAT) | |
logger = logging.getLogger('repetitious_queries') | |
logfile = '/Users/engineer/workspace/Creatorious/log/development.log' | |
histogram = {} | |
with open(logfile) as f: | |
selects = only_selects(f) | |
cleaned = [clean(line) for line in selects] | |
histogram = make_histogram(cleaned) | |
# and sort it | |
keys = histogram.keys() | |
sorted_count = [ (histogram[k], k) for k in keys ] | |
sorted_count.sort() | |
sorted_count.reverse() | |
WRITE_THAT_FILE = False | |
SUBSTRING_SIZE = 75 | |
THRESHOLD = 10 | |
for count, query in sorted_count: | |
if count > THRESHOLD: | |
print("{0}\t{1}".format(count, query[:SUBSTRING_SIZE])) | |
if WRITE_THAT_FILE: | |
save_to_file(sorted_count, threshold=THRESHOLD) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment