Created
February 12, 2011 19:30
-
-
Save hukl/824029 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'time' | |
| class LogfileParser | |
| TIME_REGEXP = /\[\d{2}\/\w{3}\/\d{4}\:\d{2}:\d{2}:\d{2}\s.{5}\]/ | |
| def initialize path, starting_at | |
| raise ArgumentError unless File.exists?( path ) | |
| @log = File.open( path ) | |
| @starting_at = Time.parse( starting_at ) | |
| @min_timestamp = @starting_at | |
| @max_timestamp = (@starting_at + 300) | |
| @bytes_to_skip = File.open( 'statefile' ) { |f| f.readline.to_i } rescue 0 | |
| end | |
| def search | |
| @log.rewind | |
| filesize = @log.stat.size | |
| buffer_size = 128000 | |
| position = 0 | |
| minute = @starting_at.strftime("%M").chars.first | |
| # Generate dynamic Regexp for the given timerange | |
| time_range = Regexp.new( | |
| @starting_at.strftime("\\[%d\\/%b\\/%Y\\:%H\\:#{minute}([0-4]\\:\\d{2}|5\\:00)\\s.{5}\\]") | |
| ) | |
| while position <= filesize | |
| # Read in a 128kB chunk | |
| buffer = @log.read(buffer_size) | |
| # Check if time range is included | |
| if buffer =~ time_range | |
| # Seek backwards for precise line search ( just to be sure ) | |
| @log.seek((position-buffer_size), File::SEEK_SET) | |
| @log.each_line do |line| | |
| next unless timestamp = line.match( TIME_REGEXP ) | |
| entry_time = Time.parse( timestamp[0].sub(":", " ") ) | |
| if entry_time >= @min_timestamp | |
| @bytes_to_skip = @log.tell | |
| File.open( 'statefile', 'w' ) { |f| f.write( @bytes_to_skip ) } | |
| return @bytes_to_skip | |
| end | |
| end | |
| end | |
| position += buffer_size | |
| @log.seek(position, File::SEEK_SET) | |
| end | |
| end | |
| def fast_forward | |
| if @bytes_to_skip > 0 | |
| @log.seek(@bytes_to_skip, File::SEEK_SET) | |
| else | |
| search | |
| end | |
| end | |
| def emit &block | |
| fast_forward | |
| @log.each_line do |line| | |
| timestamp = line.match( TIME_REGEXP ) | |
| next unless timestamp && timestamp[0] | |
| entry_time = Time.parse( timestamp[0].sub(":", " ") ) | |
| if entry_time >= @min_timestamp && entry_time <= @max_timestamp | |
| yield line | |
| end | |
| if entry_time >= @max_timestamp | |
| break | |
| end | |
| end | |
| end | |
| end | |
| parser = LogfileParser.new( *ARGV ) | |
| parser.emit {|l| puts l } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment