Skip to content

Instantly share code, notes, and snippets.

@jeffgeiger
Last active August 29, 2015 14:15
Show Gist options
  • Save jeffgeiger/293b481232ddd5ff6913 to your computer and use it in GitHub Desktop.
Save jeffgeiger/293b481232ddd5ff6913 to your computer and use it in GitHub Desktop.
Parse nginx logs with ruby
#!/usr/bin/env ruby
# Parse nginx logs with ruby
# http://rubular.com/r/2wVbwiZPMS
# (?<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - .{0}- \[(?<day>[\d]{2})\/(?<mon>[\w]+)\/(?<yr>[\d]{4})\:(?<hr>[\d]{2})\:(?<min>[\d]{2})\:(?<sec>[\d]{2}) [^$]+\] "(?<method>GET|POST|PUT|DELETE) (?<uri>[^\s]+?) HTTP\/1\.1" (?<response>[\d]+) [\d]+ "(?<referrer>[^\s]+?)" "(?<agent>[^\"]+?)"
require 'sqlite3'
dbfile = ARGV[0]
db = SQLite3::Database.open dbfile
tblcreate = db.execute('CREATE TABLE IF NOT EXISTS weblogs ("id" integer primary key NULL, "ip" text NULL, "tstamp" text NULL, "method" text NULL, "uri" text NULL, "response" text NULL, "referrer" text NULL, "agent" text NULL)')
months = { 'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04',
'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08',
'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }
$stdin.each_line do |line|
_, ip, day, mon, yr, hr, min, sec, method, uri, response, referrer, agent = /(?<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - .{0}- \[(?<day>[\d]{2})\/(?<mon>[\w]+)\/(?<yr>[\d]{4})\:(?<hr>[\d]{2})\:(?<min>[\d]{2})\:(?<sec>[\d]{2}) [^$]+\] "(?<method>GET|POST|PUT|DELETE) (?<uri>[^\s]+?) HTTP\/1\.1" (?<response>[\d]+) [\d]+ "(?<referrer>[^\s]+?)" "(?<agent>[^\"]+?)"/.match(line).to_a
#YYYY-MM-DD HH:MM:SS
month = months[mon]
tstamp = "#{yr}-#{month}-#{day} #{hr}:#{min}:#{sec}"
stmt = db.prepare("INSERT INTO weblogs(ip, tstamp, method, uri, response, referrer, agent) VALUES(?, ?, ?, ?, ?, ?, ?)")
rs = stmt.execute ip, tstamp, method, uri, response, referrer, agent
end
--Select records between date/times
select * from weblogs where datetime(tstamp) between datetime('2015-02-12 02:00:00') AND datetime('2015-02-12 03:00:00')
--Top response types by count
select response, count(id) as count from weblogs where datetime(tstamp) between datetime('2015-02-12 02:00:00') AND datetime('2015-02-12 03:00:00') group by response order by count desc limit 20;
--Top URI by count
select uri, count(id) as count from weblogs where datetime(tstamp) between datetime('2015-02-12 02:00:00') AND datetime('2015-02-12 03:00:00') group by uri order by count desc limit 20;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment