Last active
August 29, 2015 14:15
-
-
Save jeffgeiger/293b481232ddd5ff6913 to your computer and use it in GitHub Desktop.
Parse nginx logs with ruby
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Parse nginx logs with ruby | |
# http://rubular.com/r/2wVbwiZPMS | |
# (?<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - .{0}- \[(?<day>[\d]{2})\/(?<mon>[\w]+)\/(?<yr>[\d]{4})\:(?<hr>[\d]{2})\:(?<min>[\d]{2})\:(?<sec>[\d]{2}) [^$]+\] "(?<method>GET|POST|PUT|DELETE) (?<uri>[^\s]+?) HTTP\/1\.1" (?<response>[\d]+) [\d]+ "(?<referrer>[^\s]+?)" "(?<agent>[^\"]+?)" | |
require 'sqlite3' | |
dbfile = ARGV[0] | |
db = SQLite3::Database.open dbfile | |
tblcreate = db.execute('CREATE TABLE IF NOT EXISTS weblogs ("id" integer primary key NULL, "ip" text NULL, "tstamp" text NULL, "method" text NULL, "uri" text NULL, "response" text NULL, "referrer" text NULL, "agent" text NULL)') | |
months = { 'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', | |
'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08', | |
'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' } | |
$stdin.each_line do |line| | |
_, ip, day, mon, yr, hr, min, sec, method, uri, response, referrer, agent = /(?<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - .{0}- \[(?<day>[\d]{2})\/(?<mon>[\w]+)\/(?<yr>[\d]{4})\:(?<hr>[\d]{2})\:(?<min>[\d]{2})\:(?<sec>[\d]{2}) [^$]+\] "(?<method>GET|POST|PUT|DELETE) (?<uri>[^\s]+?) HTTP\/1\.1" (?<response>[\d]+) [\d]+ "(?<referrer>[^\s]+?)" "(?<agent>[^\"]+?)"/.match(line).to_a | |
#YYYY-MM-DD HH:MM:SS | |
month = months[mon] | |
tstamp = "#{yr}-#{month}-#{day} #{hr}:#{min}:#{sec}" | |
stmt = db.prepare("INSERT INTO weblogs(ip, tstamp, method, uri, response, referrer, agent) VALUES(?, ?, ?, ?, ?, ?, ?)") | |
rs = stmt.execute ip, tstamp, method, uri, response, referrer, agent | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Select records between date/times | |
select * from weblogs where datetime(tstamp) between datetime('2015-02-12 02:00:00') AND datetime('2015-02-12 03:00:00') | |
--Top response types by count | |
select response, count(id) as count from weblogs where datetime(tstamp) between datetime('2015-02-12 02:00:00') AND datetime('2015-02-12 03:00:00') group by response order by count desc limit 20; | |
--Top URI by count | |
select uri, count(id) as count from weblogs where datetime(tstamp) between datetime('2015-02-12 02:00:00') AND datetime('2015-02-12 03:00:00') group by uri order by count desc limit 20; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment