NicMcPhee · December 23, 2015 12:59 · NicMcPhee · Sep 20, 2013
diff --git a/GzipReader_TarReader_example.rb b/GzipReader_TarReader_example.rb
 require 'rubygems/package'
 require 'zlib'

 # This is intended as an example of how to use GzipReader and TarReader to process a
 # collection of gzipped tar files (.tgz files or .tar.gz files). Searching (as of Sept 2013)
 # for documentation of these libraries and examples of their use isn't terribly satisfactory,
 # so I thought I'd write up an example in the hopes that people find it helpful.

 # This assumes that a set of GZipped Tar files are provided as command line arguments,
 # and that these zipped tar files contain log data that includes (among other things)
 # information about some sort of failures (e.g., failed login attempts). Here we process
 # each file in each of the tar files, and count the number of lines that contain the
 # word "Failed", printing at the end the total number of such lines across all tar files.

 def count_failures_in_file(file)
  count = 0
  # file.read returns a String containing the entire contents of the file.
  # lines breaks that string into lines (since we want to count lines with
  # a certain property), and then each allows us to iterate over that collection
  # of lines.
  file.read.lines.each do |line|
    # Use Ruby regex to see if the line contains "Failed"
    if line =~ /Failed/
      count += 1
    end
  end
  return count
 end

 def count_failures_in_single_tar_file(tgz_file_name)
  # The GzipReader handles uncompressing the tgz file, and any attempts to read
  # from it will return the uncompressed contents of that file.
  gzip_reader = Zlib::GzipReader.open(tgz_file_name)
  # This TarReader takes the uncompressed contents of the tar file coming from
  # the GzipReader, and allows us to process the files contained in the tar archive. 
  tar_reader = Gem::Package::TarReader.new(gzip_reader)
  # A TarReader allows you to iterate over the list of files in the tar, generating
  # an Entry object for each file. Entry objects can then be asked for their name,
  # whether they are directories or files, as well as read (to get the contents 
  # of the file).
  count = 0
  tar_reader.each do |entry|
    if entry.file?
      count += count_failures_in_file(entry)
    end
  end
  tar_reader.close
  gzip_reader.close
  return count
 end

 def count_failures_in_tar_files(tgz_file_names)
  count = 0
  tgz_file_names.each do |single_tar_file|
    count += count_failures_in_single_tar_file(single_tar_file)
  end
  return count
 end

 total_failures = count_failures_in_tar_files(ARGV)
 puts "There were #{total_failures} failures in the given log files"
	require 'rubygems/package'
	require 'zlib'

	# This is intended as an example of how to use GzipReader and TarReader to process a
	# collection of gzipped tar files (.tgz files or .tar.gz files). Searching (as of Sept 2013)
	# for documentation of these libraries and examples of their use isn't terribly satisfactory,
	# so I thought I'd write up an example in the hopes that people find it helpful.

	# This assumes that a set of GZipped Tar files are provided as command line arguments,
	# and that these zipped tar files contain log data that includes (among other things)
	# information about some sort of failures (e.g., failed login attempts). Here we process
	# each file in each of the tar files, and count the number of lines that contain the
	# word "Failed", printing at the end the total number of such lines across all tar files.

	def count_failures_in_file(file)
	count = 0
	# file.read returns a String containing the entire contents of the file.
	# lines breaks that string into lines (since we want to count lines with
	# a certain property), and then each allows us to iterate over that collection
	# of lines.
	file.read.lines.each do \|line\|
	# Use Ruby regex to see if the line contains "Failed"
	if line =~ /Failed/
	count += 1
	end
	end
	return count
	end

	def count_failures_in_single_tar_file(tgz_file_name)
	# The GzipReader handles uncompressing the tgz file, and any attempts to read
	# from it will return the uncompressed contents of that file.
	gzip_reader = Zlib::GzipReader.open(tgz_file_name)
	# This TarReader takes the uncompressed contents of the tar file coming from
	# the GzipReader, and allows us to process the files contained in the tar archive.
	tar_reader = Gem::Package::TarReader.new(gzip_reader)
	# A TarReader allows you to iterate over the list of files in the tar, generating
	# an Entry object for each file. Entry objects can then be asked for their name,
	# whether they are directories or files, as well as read (to get the contents
	# of the file).
	count = 0
	tar_reader.each do \|entry\|
	if entry.file?
	count += count_failures_in_file(entry)
	end
	end
	tar_reader.close
	gzip_reader.close
	return count
	end

	def count_failures_in_tar_files(tgz_file_names)
	count = 0
	tgz_file_names.each do \|single_tar_file\|
	count += count_failures_in_single_tar_file(single_tar_file)
	end
	return count
	end

	total_failures = count_failures_in_tar_files(ARGV)
	puts "There were #{total_failures} failures in the given log files"