Skip to content

Instantly share code, notes, and snippets.

@robcthegeek
Created July 21, 2011 23:31
Show Gist options
  • Save robcthegeek/1098498 to your computer and use it in GitHub Desktop.
Save robcthegeek/1098498 to your computer and use it in GitHub Desktop.
Log Parser

Log Parser

Just hacking around with a log-parsing script.

# Confguration for Log File Parser.
# If you are unsure on how to edit YAML, do check out the Wikipedia page, it's pretty good:
# http://en.wikipedia.org/wiki/YAML
# Core Parser Configuration - Actually defines how it works.
# Add categories to the categories element in the format of "Category Name", which
# then contains a list of strings that will be used as regular expressions (case insensitive).
parser:
# The marker *MUST* return groups for day, month, year, hours, minutes, seconds for timestamp creation.
marker: "(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})"
categories:
- "Database Errors":
- "NHibernate"
- "ADO.NET"
module LogParser
class EntryCategorizer
def initialize(categories = [])
@categories = *categories
end
def categories
@categories
end
def categorize(entry)
stack = entry.stack
result = [ ]
@categories.each do |cat|
cat.matchers.each do |matcher|
result << cat.name if stack.match(matcher)
LogEntry.class_eval %Q{
def categories
@categories ||= [ ]
end
}
entry.categories << cat.name
end
end
result
end
end
class Category
attr_reader :name, :matchers
def initialize(name, *matchers)
@name = name
@matchers = matchers
end
end
end
module LogParser
require 'date'
class LogEntry
attr_reader :datetime
attr_reader :stack
def initialize(datetime, stack)
@datetime = datetime
@stack = stack
end
end
end
$:.push(File.dirname(__FILE__))
module LogParser
def self.req(file)
require "log_parser/#{file}"
end
req 'parser'
req 'log_entry'
req 'entry_categorizer'
end
module LogParser
class Parser
def initialize(config = nil)
@config = config ||= {
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i
}
end
def parse(logText)
return [ ] if logText.empty?
result = [ ]
current_match = nil
current_stack = ""
logText.each_line do |line|
if match = line.match(@config[:marker])
(result ||= [ ]) << LogEntry.new(datetime_from_marker(current_match), current_stack) unless current_match.nil?
current_stack = line
current_match = match
else
spacer = line.strip == "" ? "" : "\n"
current_stack = "#{current_stack}#{spacer}#{line.strip}"
end
end
# Add the First (& Only) or Last Itenm
result << LogEntry.new(datetime_from_marker(current_match), current_stack) unless current_match.nil?
result
end
private
def datetime_from_marker(match)
DateTime.new( match[:year].to_i,
match[:month].to_i,
match[:day].to_i,
match[:hours].to_i,
match[:minutes].to_i,
match[:seconds].to_i )
end
end
end
require "rake"
require "rake/testtask"
require "yaml"
OUTPUT_DIR = "output"
root = File.dirname(__FILE__)
$:.push(root)
task :default => ["test:all"]
namespace :test do
desc "Run all unit tests"
Rake::TestTask.new(:all) do |test|
test.libs << ["lib", "test"]
test.test_files = Dir[ "test/**/test_*.rb" ]
end
end
namespace :parse do
require 'lib/log_parser'
desc "Parse all *.log files in the current directory"
task :all do
glob_pattern = File.join(root, "**", "*{.log,.log.*}")
files = Dir.glob(glob_pattern)
if files.empty?
p "No log files to process."
else
@summary = Hash.new
config = load_config
parser = LogParser::Parser.new(config)
categorizer = LogParser::EntryCategorizer.new(config[:categories])
create_clean_output_dir
Dir.glob(glob_pattern).each do |file|
p "Parsing '#{File.basename(file)}'"
entries = parser.parse file_contents(file)
p "Categorizing #{formatted(entries.count)} entries..."
entries.each do |entry|
categorise(categorizer, entry)
end
end
summarise
end
end
desc "Parse current Live logs"
task :current do
# TODO: Retrieve the Current Logs from the Live Servers
# TODO: Glob and Parse Those
Dir.mkdir("Live")
system "cp //192.168/110.252/GG.Web.Website/V3MainSite.log Live/"
end
private
def load_config
file = config_from_file
file ||= default_config
end
def config_from_file
return nil unless File.exists? 'config.yml'
file = YAML::load(File.read('config.yml'))
config = file["parser"]
categories = [ ]
config["categories"].each do |cat|
cat.each do |name, matchers|
matchers_as_regex = matchers.collect { |exp| Regexp.new(exp, Regexp::IGNORECASE) }
categories << LogParser::Category.new(name, *matchers_as_regex)
end
end
{
:marker => Regexp.new(config["marker"]),
:categories => categories
}
end
def default_config
puts "No config file found - using default configuration."
{
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i,
:categories => [ ["Errors", /error/i] ]
}
end
def category(name, matcher)
LogParser::Category.new(name, matcher)
end
def file_contents(file)
File.open(file,'rb').read
end
def categorise(categorizer, entry)
categories = categorizer.categorize(entry)
categories.each do |cat|
track(cat, entry)
summary(cat)
end
end
def track(category, entry)
Dir.mkdir("output") unless Dir.exists? "output"
f = File.new("output/#{category}.txt", "a")
f.puts entry.stack
f.puts "-" * 50
end
def summary(category)
current = @summary[category] ||= 0
@summary[category] = current + 1
end
def summarise
p "Summarising Errors"
Dir.mkdir("output") unless Dir.exists? "output"
f = File.new("output/summary.txt", "a")
f.puts "Summary of Errors"
f.puts "-----------------\n"
@summary.each do |cat, count|
f.printf("%-25s %s\n", "#{cat}:", "#{formatted(count)}")
end
end
def create_clean_output_dir
require 'fileutils'
FileUtils.rm_rf OUTPUT_DIR
Dir.mkdir OUTPUT_DIR
end
def formatted(st)
st.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
end
end
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error
Stack Trace:
Class1::Method
Class2::Method
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error
Stack Trace:
Class1::Method
Class2::Method
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error
Stack Trace:
Class1::SampleMethod
12-03-2011 12:21:50 [38] ERROR SomeWebsite.Controller - Unexpected Error
Stack Trace:
Class1::Method
Class2::Method
14-02-2011 18:55:07 [15] ERROR Imaging - Unexpected Fatal Exception Occurred in Image Resizer. Returning Single Pixel
System.Threading.ThreadAbortException: Thread was being aborted.
at System.Threading.Thread.AbortInternal()
at System.Threading.Thread.Abort(Object stateInfo)
at System.Web.HttpResponse.End()
at Imaging.ConvertImageToOutputStream(HttpContext context, QueryString qs)
at Imaging.ProcessRequest(HttpContext context)
require "test/unit"
require "log_parser"
class TestEntryCategorizer < Test::Unit::TestCase
require "date"
def setup
@categorizer = LogParser::EntryCategorizer.new
@dbCategory = LogParser::Category.new("Database Errors", /database error/i)
@genError = LogParser::Category.new("Generic Errors", /error/i)
@categorizer.categories << @dbCategory
@categorizer.categories << @genError
end
def test_categorize_entry_matches_one_sets_category
entry = LogParser::LogEntry.new("01-12-2011 14:30:45", "Unexpected Database Error")
assert @categorizer.categorize(entry).include?("Database Errors")
end
def test_categorize_entry_does_not_match_returns_empty
entry = LogParser::LogEntry.new("01-12-2011 14:30:45", "Unmatched Message")
assert @categorizer.categorize(entry).empty?
end
def test_categorize_entry_matches_adds_categories_attribute
entry = LogParser::LogEntry.new("01-12-2011 14:30:45", "Unexpected Database Error")
@categorizer.categorize(entry)
assert entry.categories.include? "Database Errors"
assert entry.categories.include? "Generic Errors"
end
end
require "test/unit"
require "log_parser"
class TestLogParser < Test::Unit::TestCase
def setup
config = {
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i
}
@parser = LogParser::Parser.new(config)
end
# TODO: Tests for Reading from Files etc.
end
require "test/unit"
require "log_parser"
class TestLogParser < Test::Unit::TestCase
def setup
config = {
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i
}
@parser = LogParser::Parser.new(config)
end
def test_responds_to_parse
assert @parser.respond_to? :parse
end
def test_returns_empty_for_empty_string
result = @parser.parse("")
assert result.empty?
end
def test_returns_one_for_one_error
text = create_errors_log
result = @parser.parse(text)
assert result.count == 1
end
def test_returns_two_for_two_errors
text = create_errors_log(2)
result = @parser.parse(text)
assert result.count == 2
end
def test_returns_logentry_for_each_item
text = create_errors_log
result = @parser.parse(text)
assert_instance_of LogParser::LogEntry, result[0]
end
def test_populates_stacktrace_on_logentry
text = create_errors_log
result = @parser.parse(text)[0]
assert_match /SomeWebsite.Controller/, result.stack
assert_match /Class1::Method/i, result.stack
assert_match /Class2::Method/i, result.stack
end
def test_keeps_stacks_seperate_for_each_logentry
# Added as BugFix, Stack Traces were being carried between Log Entries.
text = create_errors_log 5
result = @parser.parse(text)[4]
assert (result.stack =~ /\(1\)/i).nil?, "Stack Trace Contains Info From First Error Entry"
end
def test_stack_trace_only_contains_header_line_once
# Added as BugFix, Stack Traces Duplicated the First Line (The 'yyyy-mm-dd' Marker)
text = create_errors_log 5
result = @parser.parse(text)[4]
assert result.stack.scan(/SomeWebsite\.Controller/i).count ==1, "More Than One Instance of Header Text Found in Stack Trace"
end
def test_sets_logentry_datetime
text = create_errors_log(100)
result = @parser.parse(text)[0]
assert result.datetime == DateTime.new(2011, 03, 11, 12, 20, 44)
end
def test_uses_marker_from_config
text = <<END_OF_TEXT
20111231-122044 - Some Error
END_OF_TEXT
marker_exp = /(?<year>\d{4})(?<month>\d{2})(?<day>\d{2})-(?<hours>\d{2})(?<minutes>\d{2})(?<seconds>\d{2})/i
parser = LogParser::Parser.new({:marker => marker_exp})
result = parser.parse(text)
assert result.count == 1
end
private
def create_errors_log(num = 1)
output = ""
(1..num).each do |i|
doc = <<END_OF_TEXT
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error(#{i.to_s})
Stack Trace(#{i.to_s}):
Class1::Method
Class2::Method
END_OF_TEXT
output << doc
end
output
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment