Skip to content

Instantly share code, notes, and snippets.

@corvax19
Last active August 29, 2015 14:05
Show Gist options
  • Save corvax19/4d8b9a9661e1dcd775d3 to your computer and use it in GitHub Desktop.
Save corvax19/4d8b9a9661e1dcd775d3 to your computer and use it in GitHub Desktop.
Extract SQL statements from Siebel logfiles
#!/usr/bin/awk -f
function help() {
print "Scripts detects and optionally extracts SQL statements among with "
print "binding variables and execution time from Siebel component's logfile."
print "Extraction is much slower - where applicable, run detection first"
print "to confirm there are interesting statements in the log."
print "TOP5 identified by following criterias:"
print " - most frequent statement"
print " - slowest statement"
print " - most time consuming statement (accumulated execution time)"
print
print "Usage: SQLextract [-v [exectime=N][dump=P][animation=1]] <logfiles>"
print "where N - min execution time (in sec) to extract (quicker statements are ignored)"
print " P - path where to dump extracted statements"
print
print "Samples:"
print "SQLextract *.log"
print " would detect and report all statements from all available log files"
print
print "SQLextract -v exectime=10 *.log"
print " would detect and report statements running longer than 10 sec from all available log files"
print
print "SQLextract -v exectime=10 dump=. *.log"
print " would detect, report statements running longer than 10 sec from all available log files"
print " and extract those statements into separate files in current dir"
print
print "SQLextract -v trace=1 *.log"
print " process could be long and boring - some real-time stats streamed on STDERR"
print
print "By [email protected] 20140829"
showStats=0 # no stats should be shown
exit 0
}
function shorthelp() {
print "Usage: SQLextract [-v [exectime=N][dump=P]] <logfiles>"
print "Run 'SQLextract help' for longer help."
showStats=0 # no stats should be shown
exit 0
}
function sortTop(array, subject, top) {
for (i=1; i<=5; i++) {
maxValue = 0
for (c in array) if (maxValue < array[c]) { maxIndex = c; maxValue = array[c]}
printf ("MD5: %s %s: %s\n", maxIndex, subject, maxValue)
delete array[maxIndex]
}
}
BEGIN {
if (ARGC==1) shorthelp()
if (ARGC==2 && ARGV[1] ~ /^h/) help()
start_regex = "^ObjMgrSqlLog.*?(SELECT|UPDATE|INSERT|DELETE).*?statement with ID"
end_regex = "***** SQL Statement Execute Time"
dumpdir = dump ? "./"dump : "./"
statcount=-1
showStats=1 # should we dump stats at the end
}
/^$/ { linecount-- }
{ if (prevfile != FILENAME) {
prevfile = FILENAME
linecount = statcount = 0
if (trace) {
if (statcount>-1) printf("%d statements checked", statcount) > "/dev/stderr"
cmd="du -b "FILENAME; cmd|getline size; close(cmd)
printf("\nProcessing %s (%10.0fb) ", FILENAME, size) > "/dev/stderr"
}
}
linecount++
}
start_regex, end_regex {
if ($0 ~ start_regex) {
fromLine = linecount
ID = $NF
statement = $7
if (dump) buffer = ""
} else
if ($0 ~ end_regex) {
toLine = linecount
if (dump) buffer = buffer $0 "\n"
if (trace) statcount++
if ($(NF-2) > exectime) {
# calculating sql text's md5
gsub(/[[:space:]]+/, " ", SQLbuffer)
gsub("'", "\"", SQLbuffer)
md5Cmd = sprintf("echo -n '%s'|md5sum|awk '{print $1}'", toupper(SQLbuffer))
md5Cmd | getline md5
close(md5Cmd)
printf("{\"statement\":\"%s\",\"SQLID\"=\"%s\",\"md5\":\"%s\",\"ts\":\"%s\",\"sec\":\"%.3f\",\"file\":\"%s\",\"fromLine\":%d,\"toLine\":%d}\n",
statement, ID, md5, ts, $(NF-2), FILENAME, fromLine, toLine)
hashCount[md5]++
hashAccum[md5]+=$(NF-2)
if (hashLongest[md5] < $(NF-2)) hashLongest[md5] = $(NF-2)
if (dump) {
extractFilename = "_" FILENAME "_" md5 "_lines_" fromLine "-" toLine "_id"_ ID "_" $(NF-2) "_sec.sqlperf"
print buffer >> extractFilename;
close(extractFilename)
}
}
SQLbuffer = ""
}
if ($0 ~ /^ObjMgrSqlLog.*?[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}[[:blank:]][[:digit:]]{2}(:[[:digit:]]{2}){2}[[:blank:]]+$/) {
ts=$(NF-1)" "$NF
}
if (dump) buffer = buffer $0 "\n" # print >> "temp"counter
}
/^SELECT/,/^ObjMgrSqlLog/ {
if ($0 !~ /^ObjMgrSqlLog/) { SQLbuffer = SQLbuffer $0 }
}
END {
if (showStats!=0) {
print "TOP5 most frequent statements:"
sortTop(hashCount, "count", 5)
print "TOP5 slowest statements:"
sortTop(hashLongest, "count", 5)
print "TOP5 time consuming statements:"
sortTop(hashAccum, "count", 5)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment