Just hacking around with a log-parsing script.
Created
July 21, 2011 23:31
-
-
Save robcthegeek/1098498 to your computer and use it in GitHub Desktop.
Log Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Confguration for Log File Parser. | |
# If you are unsure on how to edit YAML, do check out the Wikipedia page, it's pretty good: | |
# http://en.wikipedia.org/wiki/YAML | |
# Core Parser Configuration - Actually defines how it works. | |
# Add categories to the categories element in the format of "Category Name", which | |
# then contains a list of strings that will be used as regular expressions (case insensitive). | |
parser: | |
# The marker *MUST* return groups for day, month, year, hours, minutes, seconds for timestamp creation. | |
marker: "(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})" | |
categories: | |
- "Database Errors": | |
- "NHibernate" | |
- "ADO.NET" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module LogParser | |
class EntryCategorizer | |
def initialize(categories = []) | |
@categories = *categories | |
end | |
def categories | |
@categories | |
end | |
def categorize(entry) | |
stack = entry.stack | |
result = [ ] | |
@categories.each do |cat| | |
cat.matchers.each do |matcher| | |
result << cat.name if stack.match(matcher) | |
LogEntry.class_eval %Q{ | |
def categories | |
@categories ||= [ ] | |
end | |
} | |
entry.categories << cat.name | |
end | |
end | |
result | |
end | |
end | |
class Category | |
attr_reader :name, :matchers | |
def initialize(name, *matchers) | |
@name = name | |
@matchers = matchers | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module LogParser | |
require 'date' | |
class LogEntry | |
attr_reader :datetime | |
attr_reader :stack | |
def initialize(datetime, stack) | |
@datetime = datetime | |
@stack = stack | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$:.push(File.dirname(__FILE__)) | |
module LogParser | |
def self.req(file) | |
require "log_parser/#{file}" | |
end | |
req 'parser' | |
req 'log_entry' | |
req 'entry_categorizer' | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module LogParser | |
class Parser | |
def initialize(config = nil) | |
@config = config ||= { | |
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i | |
} | |
end | |
def parse(logText) | |
return [ ] if logText.empty? | |
result = [ ] | |
current_match = nil | |
current_stack = "" | |
logText.each_line do |line| | |
if match = line.match(@config[:marker]) | |
(result ||= [ ]) << LogEntry.new(datetime_from_marker(current_match), current_stack) unless current_match.nil? | |
current_stack = line | |
current_match = match | |
else | |
spacer = line.strip == "" ? "" : "\n" | |
current_stack = "#{current_stack}#{spacer}#{line.strip}" | |
end | |
end | |
# Add the First (& Only) or Last Itenm | |
result << LogEntry.new(datetime_from_marker(current_match), current_stack) unless current_match.nil? | |
result | |
end | |
private | |
def datetime_from_marker(match) | |
DateTime.new( match[:year].to_i, | |
match[:month].to_i, | |
match[:day].to_i, | |
match[:hours].to_i, | |
match[:minutes].to_i, | |
match[:seconds].to_i ) | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "rake" | |
require "rake/testtask" | |
require "yaml" | |
OUTPUT_DIR = "output" | |
root = File.dirname(__FILE__) | |
$:.push(root) | |
task :default => ["test:all"] | |
namespace :test do | |
desc "Run all unit tests" | |
Rake::TestTask.new(:all) do |test| | |
test.libs << ["lib", "test"] | |
test.test_files = Dir[ "test/**/test_*.rb" ] | |
end | |
end | |
namespace :parse do | |
require 'lib/log_parser' | |
desc "Parse all *.log files in the current directory" | |
task :all do | |
glob_pattern = File.join(root, "**", "*{.log,.log.*}") | |
files = Dir.glob(glob_pattern) | |
if files.empty? | |
p "No log files to process." | |
else | |
@summary = Hash.new | |
config = load_config | |
parser = LogParser::Parser.new(config) | |
categorizer = LogParser::EntryCategorizer.new(config[:categories]) | |
create_clean_output_dir | |
Dir.glob(glob_pattern).each do |file| | |
p "Parsing '#{File.basename(file)}'" | |
entries = parser.parse file_contents(file) | |
p "Categorizing #{formatted(entries.count)} entries..." | |
entries.each do |entry| | |
categorise(categorizer, entry) | |
end | |
end | |
summarise | |
end | |
end | |
desc "Parse current Live logs" | |
task :current do | |
# TODO: Retrieve the Current Logs from the Live Servers | |
# TODO: Glob and Parse Those | |
Dir.mkdir("Live") | |
system "cp //192.168/110.252/GG.Web.Website/V3MainSite.log Live/" | |
end | |
private | |
def load_config | |
file = config_from_file | |
file ||= default_config | |
end | |
def config_from_file | |
return nil unless File.exists? 'config.yml' | |
file = YAML::load(File.read('config.yml')) | |
config = file["parser"] | |
categories = [ ] | |
config["categories"].each do |cat| | |
cat.each do |name, matchers| | |
matchers_as_regex = matchers.collect { |exp| Regexp.new(exp, Regexp::IGNORECASE) } | |
categories << LogParser::Category.new(name, *matchers_as_regex) | |
end | |
end | |
{ | |
:marker => Regexp.new(config["marker"]), | |
:categories => categories | |
} | |
end | |
def default_config | |
puts "No config file found - using default configuration." | |
{ | |
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i, | |
:categories => [ ["Errors", /error/i] ] | |
} | |
end | |
def category(name, matcher) | |
LogParser::Category.new(name, matcher) | |
end | |
def file_contents(file) | |
File.open(file,'rb').read | |
end | |
def categorise(categorizer, entry) | |
categories = categorizer.categorize(entry) | |
categories.each do |cat| | |
track(cat, entry) | |
summary(cat) | |
end | |
end | |
def track(category, entry) | |
Dir.mkdir("output") unless Dir.exists? "output" | |
f = File.new("output/#{category}.txt", "a") | |
f.puts entry.stack | |
f.puts "-" * 50 | |
end | |
def summary(category) | |
current = @summary[category] ||= 0 | |
@summary[category] = current + 1 | |
end | |
def summarise | |
p "Summarising Errors" | |
Dir.mkdir("output") unless Dir.exists? "output" | |
f = File.new("output/summary.txt", "a") | |
f.puts "Summary of Errors" | |
f.puts "-----------------\n" | |
@summary.each do |cat, count| | |
f.printf("%-25s %s\n", "#{cat}:", "#{formatted(count)}") | |
end | |
end | |
def create_clean_output_dir | |
require 'fileutils' | |
FileUtils.rm_rf OUTPUT_DIR | |
Dir.mkdir OUTPUT_DIR | |
end | |
def formatted(st) | |
st.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,") | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error | |
Stack Trace: | |
Class1::Method | |
Class2::Method | |
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error | |
Stack Trace: | |
Class1::Method | |
Class2::Method | |
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error | |
Stack Trace: | |
Class1::SampleMethod | |
12-03-2011 12:21:50 [38] ERROR SomeWebsite.Controller - Unexpected Error | |
Stack Trace: | |
Class1::Method | |
Class2::Method | |
14-02-2011 18:55:07 [15] ERROR Imaging - Unexpected Fatal Exception Occurred in Image Resizer. Returning Single Pixel | |
System.Threading.ThreadAbortException: Thread was being aborted. | |
at System.Threading.Thread.AbortInternal() | |
at System.Threading.Thread.Abort(Object stateInfo) | |
at System.Web.HttpResponse.End() | |
at Imaging.ConvertImageToOutputStream(HttpContext context, QueryString qs) | |
at Imaging.ProcessRequest(HttpContext context) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "test/unit" | |
require "log_parser" | |
class TestEntryCategorizer < Test::Unit::TestCase | |
require "date" | |
def setup | |
@categorizer = LogParser::EntryCategorizer.new | |
@dbCategory = LogParser::Category.new("Database Errors", /database error/i) | |
@genError = LogParser::Category.new("Generic Errors", /error/i) | |
@categorizer.categories << @dbCategory | |
@categorizer.categories << @genError | |
end | |
def test_categorize_entry_matches_one_sets_category | |
entry = LogParser::LogEntry.new("01-12-2011 14:30:45", "Unexpected Database Error") | |
assert @categorizer.categorize(entry).include?("Database Errors") | |
end | |
def test_categorize_entry_does_not_match_returns_empty | |
entry = LogParser::LogEntry.new("01-12-2011 14:30:45", "Unmatched Message") | |
assert @categorizer.categorize(entry).empty? | |
end | |
def test_categorize_entry_matches_adds_categories_attribute | |
entry = LogParser::LogEntry.new("01-12-2011 14:30:45", "Unexpected Database Error") | |
@categorizer.categorize(entry) | |
assert entry.categories.include? "Database Errors" | |
assert entry.categories.include? "Generic Errors" | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "test/unit" | |
require "log_parser" | |
class TestLogParser < Test::Unit::TestCase | |
def setup | |
config = { | |
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i | |
} | |
@parser = LogParser::Parser.new(config) | |
end | |
# TODO: Tests for Reading from Files etc. | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "test/unit" | |
require "log_parser" | |
class TestLogParser < Test::Unit::TestCase | |
def setup | |
config = { | |
:marker => /(?<day>\d{2})-(?<month>\d{2})-(?<year>\d{4}).*(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/i | |
} | |
@parser = LogParser::Parser.new(config) | |
end | |
def test_responds_to_parse | |
assert @parser.respond_to? :parse | |
end | |
def test_returns_empty_for_empty_string | |
result = @parser.parse("") | |
assert result.empty? | |
end | |
def test_returns_one_for_one_error | |
text = create_errors_log | |
result = @parser.parse(text) | |
assert result.count == 1 | |
end | |
def test_returns_two_for_two_errors | |
text = create_errors_log(2) | |
result = @parser.parse(text) | |
assert result.count == 2 | |
end | |
def test_returns_logentry_for_each_item | |
text = create_errors_log | |
result = @parser.parse(text) | |
assert_instance_of LogParser::LogEntry, result[0] | |
end | |
def test_populates_stacktrace_on_logentry | |
text = create_errors_log | |
result = @parser.parse(text)[0] | |
assert_match /SomeWebsite.Controller/, result.stack | |
assert_match /Class1::Method/i, result.stack | |
assert_match /Class2::Method/i, result.stack | |
end | |
def test_keeps_stacks_seperate_for_each_logentry | |
# Added as BugFix, Stack Traces were being carried between Log Entries. | |
text = create_errors_log 5 | |
result = @parser.parse(text)[4] | |
assert (result.stack =~ /\(1\)/i).nil?, "Stack Trace Contains Info From First Error Entry" | |
end | |
def test_stack_trace_only_contains_header_line_once | |
# Added as BugFix, Stack Traces Duplicated the First Line (The 'yyyy-mm-dd' Marker) | |
text = create_errors_log 5 | |
result = @parser.parse(text)[4] | |
assert result.stack.scan(/SomeWebsite\.Controller/i).count ==1, "More Than One Instance of Header Text Found in Stack Trace" | |
end | |
def test_sets_logentry_datetime | |
text = create_errors_log(100) | |
result = @parser.parse(text)[0] | |
assert result.datetime == DateTime.new(2011, 03, 11, 12, 20, 44) | |
end | |
def test_uses_marker_from_config | |
text = <<END_OF_TEXT | |
20111231-122044 - Some Error | |
END_OF_TEXT | |
marker_exp = /(?<year>\d{4})(?<month>\d{2})(?<day>\d{2})-(?<hours>\d{2})(?<minutes>\d{2})(?<seconds>\d{2})/i | |
parser = LogParser::Parser.new({:marker => marker_exp}) | |
result = parser.parse(text) | |
assert result.count == 1 | |
end | |
private | |
def create_errors_log(num = 1) | |
output = "" | |
(1..num).each do |i| | |
doc = <<END_OF_TEXT | |
11-03-2011 12:20:44 [38] ERROR SomeWebsite.Controller - Unexpected Error(#{i.to_s}) | |
Stack Trace(#{i.to_s}): | |
Class1::Method | |
Class2::Method | |
END_OF_TEXT | |
output << doc | |
end | |
output | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment