Skip to content

Instantly share code, notes, and snippets.

@overplumbum
Created September 20, 2012 10:59
Show Gist options
  • Save overplumbum/3755232 to your computer and use it in GitHub Desktop.
Save overplumbum/3755232 to your computer and use it in GitHub Desktop.
Slow queries log benchmark automation
#!/usr/bin/env python
import csv
import psycopg2
from datetime import datetime
COLS = ('id', 'date', 'connection', 'database', 'user', 'duration', 'query')
IDX = dict(zip(COLS, range(len(COLS))))
IDX_QUERY = IDX['query']
conn = psycopg2.connect('')
cur = conn.cursor()
f = open("csv-query.txt", "rb")
lines = sum(1 for line in f)
report_each = max(lines / 100, 1)
f.seek(0)
ff = csv.reader(f)
start = datetime.utcnow()
i = 0
errors = 0
for row in ff:
query = row[IDX_QUERY]
try:
cur.execute(query)
x = cur.fetchall()
x = len(x)
except:
errors += 1
cur.close()
cur = conn.cursor()
if 0 == (i % report_each):
dt = datetime.utcnow() - start
print "{}% completed for {} with {} errors".format(100*i/lines, dt, errors)
i += 1
dt = datetime.utcnow() - start
print "time taken:", dt, "or", dt.total_seconds(), "seconds"
d
zcat syslog.2.gz | head -n 1000000 | pv -l -s 1000000 -ptaer | php pgfouine.php -file - -format text -report ../csv-query.txt=csv-query -onlyselect -logtype syslog -memorylimit 9000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment