Last active
May 2, 2019 14:37
-
-
Save theprogrammerin/e3206a4ec7a7a4086ac2 to your computer and use it in GitHub Desktop.
Mysql slow query [file] log parser. It combines multi line log into a single line. Also adds a generalised query be replacing query data with 'XXX' which can then be used to identify the slow query pattern.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
# Ashutosh Agrawal | |
# http://blog.theprogrammer.in | |
# | |
# 2014-11-20 v1.0 | |
# | |
# This script is modified version of SQL parser written by | |
# | |
# Nathanial Hendler | |
# http://retards.org/ | |
# | |
# This perl script parses a MySQL slow_queries log file | |
# ignoring all queries less than $min_time and prints | |
# the required data in a signle line, for easy parsing of it | |
# by any log parser. | |
# | |
# | |
# Usage: mysql_slow_log_parser logfile | |
# | |
# ------------------------ | |
# SOMETHING TO THINK ABOUT (aka: how to read output) | |
# ------------------------ | |
# | |
# Also, it does to regex substitutions to normalize | |
# the queries... | |
# | |
# $query_string =~ s/\d+/XXX/g; | |
# $query_string =~ s/([\'\"]).+?([\'\"])/$1XXX$2/g; | |
# | |
# These replace numbers with XXX and strings found in | |
# quotes with XXX so that the same select statement | |
# with different WHERE clauses will be considered | |
# as the same query. | |
# | |
# so these... | |
# | |
# SELECT * FROM offices WHERE office_id = 3; | |
# SELECT * FROM offices WHERE office_id = 19; | |
# | |
# become... | |
# | |
# SELECT * FROM offices WHERE office_id = XXX; | |
# | |
# | |
# And these... | |
# | |
# SELECT * FROM photos WHERE camera_model LIKE 'Nikon%'; | |
# SELECT * FROM photos WHERE camera_model LIKE '%Olympus'; | |
# | |
# become... | |
# | |
# SELECT * FROM photos WHERE camera_model LIKE 'XXX'; | |
# | |
# | |
# --------------------- | |
# THIS MAY BE IMPORTANT (aka: Probably Not) | |
# --------------------- | |
# | |
# *SO* if you use numbers in your table names, or column | |
# names, you might get some oddities, but I doubt it. | |
# I mean, how different should the following queries be | |
# considered? | |
# | |
# SELECT car1 FROM autos_10; | |
# SELECT car54 FROM autos_11; | |
# | |
# I don't think so. | |
# | |
$min_time = 0; # Skip queries less than $min_time | |
$min_rows = 0; | |
$max_display = 10; # Truncate display if more than $max_display occurances of a query | |
print "\n Starting... \n"; | |
$query_string = ''; | |
$time = 0; | |
$new_sql = 0; | |
$user = ''; | |
$host = ''; | |
############################################## | |
# Loop Through The Logfile | |
############################################## | |
while (<>) { | |
# Skip Bogus Lines | |
next if ( m|/.*mysqld, Version:.+ started with:| ); | |
next if ( m|Tcp port: \d+ Unix socket: .*mysql.sock| ); | |
next if ( m|Time\s+Id\s+Command\s+Argument| ); | |
next if ( m|administrator\s+command:| ); | |
# if ( /Query_time:\s+(.*)\s+Lock_time:\s+(.*)\s/ ) { | |
#if ( /Query_time:\s+(.*)\s+Lock_time:\s+(.*)\s+Rows_examined:\s+(\d+)/ ) { | |
if ( /User\SHost:\s+([\S]*)\s+\S\s+\S*\s+\[+(.*)\][\s+\S]*/ ) { | |
# Try This if the above doesn't work | |
# if ( /User\SHost:\s+(.*)\s\S\s+\[+(.*)\][\s+\S]*/ ) { | |
$user = $1; | |
$host = $2; | |
next; | |
} | |
if ( /Query_time:\s+(.*)\s+Lock_time:\s+(.*)\s+Rows_examined:\s+(.*)/ ) { | |
$time = $1; | |
$rows = $3; | |
$new_sql = 1; | |
# print "found $1 $3\n"; | |
next; | |
} | |
if ( /^\#/ && $query_string ) { | |
if (($time > $min_time) && ($rows >= $min_rows)) { | |
$orig_query = $query_string; | |
$query_string =~ s/\d+/XXX/g; | |
$query_string =~ s/'([^'\\]*(\\.[^'\\]*)*)'/'XXX'/g; | |
$query_string =~ s/"([^"\\]*(\\.[^"\\]*)*)"/"XXX"/g; | |
#$query_string =~ s/([\'\"]).+?([\'\"])/$1XXX$2/g; | |
#$query_string =~ s/\s+/ /g; | |
#$query_string =~ s/\n+/\n/g; | |
$users{$query_string} = $user; | |
$hosts{$query_string} = $host; | |
push @{$queries{$query_string}}, $time; | |
push @{$queries_rows{$query_string}}, $rows; | |
$queries_tot{$query_string} += $time; | |
$queries_orig{$query_string} = $orig_query; | |
$query_string = ''; | |
} | |
} else { | |
if ($new_sql) { | |
$query_string = $_; | |
$new_sql = 0; | |
} else { | |
$query_string .= $_; | |
} | |
} | |
} | |
############################################## | |
# Display Output | |
############################################## | |
foreach my $query ( sort { $queries_tot{$b} <=> $queries_tot{$a} } keys %queries_tot ) { | |
my $total = 0; | |
my $cnt = 0; | |
my @seconds = sort { $a <=> $b } @{$queries{$query}}; | |
my @rows = sort { $a <=> $b } @{$queries_rows{$query}}; | |
($total+=$_) for @seconds; | |
($cnt++) for @seconds; | |
print "Database: $ENV{DATABASE}"; | |
print " | User: ".$users{$query}; | |
print " | Host: ".$hosts{$query}; | |
print " | NoQuery: ".@{$queries{$query}}; | |
print " | TotalTime: " . $total; | |
print " | AverageTime: ".($total/$cnt); | |
print " | TimeTaken: "; | |
print @seconds > $max_display ? "$seconds[0] to $seconds[-1]" : sec_joiner(\@seconds); | |
print " seconds to complete "; | |
print " | RowsAnalyzed: "; | |
print @rows > $max_display ? "$rows[0] - $rows[-1]": sec_joiner(\@rows); | |
$orignal = $query; | |
$query =~ s/\n/ /g; | |
$queries_orig{$orignal} =~ s/\n/ /g; | |
print " | Pattern: $query"; | |
print " | Orignal: "; | |
print $queries_orig{$orignal}."\n"; | |
} | |
sub sec_joiner { | |
my ($seconds) = @_; | |
$string = join(", ", @{$seconds}); | |
$string =~ s/, (\d+)$/ and $1/; | |
return $string; | |
} | |
exit(0); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The grouping in the associative array could be a bit better. I have 4 slow queries greater than 2 seconds, but I only get 3 returned in groups of 2 and 1. Adding
next if ( m|^use.*| );
merges those two together, and removing the twonext;
within theUser
andQuery_time
conditionals properly reintroduces my missing 4th query.All 4 queries are of the same pattern, so it was curious behavior to see one missing initially.