weedySeaDragon · October 11, 2017 23:00
diff --git a/ruby_read_file_in_chunks.rb b/ruby_read_file_in_chunks.rb
 # (still working on the comments / descriptions for this)

 # When you don't want to read an entire file into memory at once, you can use Enumerators to read it in 1 part at a time.
 #
 # An Enumerator is really like someone that keeps track of a _where you are_ in a collection of things,
 # and knows how to do a few things related to the current location, like:
 #    - give me one thing at a time, a.k.a.  ".each"
 #    - go to the next thing, a.k.a.  ".next"
 #    - get the next thing, but don't actually move your location forward to it, a.k.a. ".peek"
 #    - tell me how many things there are, a.k.a. ".size"
 #    - go back to the start, a.k.a. ".rewind"
 #   and other cool stuff.

 # There is a very cool subclass of Enumerator that will _only work with a certain number of things at a time._
 # It is class Lazy < Enumerator  ( in the enumerator.rb file)
 #
 # Since it only works with a certain number things at a time from your collection,
 # it means you can _read from an infinite collection!_  Yes!  it's true!
 #
 #
 # Among the methods it has are:
 #
 #

    # return the number of matches in the file.
    # Don't read the entire file into memory; read it only (num_lines_per_batch) lines at a time
    #  @url https://stackoverflow.com/questions/2962134/ruby-read-file-in-batches/41069373#41069373
    def  num_matches_in_file(fname, match_regexp)

      num_lines_per_batch   = 5000

      num_matched = 0
      File.open(fname, "r") do |f|

        # use an enumerator to read just (num_lines_per_batch) lines at a time
        f.lazy.each_slice(num_lines_per_batch) do |lines|

          num_matched += lines.select{ | line | line.match(match_regexp)}.count

        end

      end

      num_matched

    end
	# (still working on the comments / descriptions for this)

	# When you don't want to read an entire file into memory at once, you can use Enumerators to read it in 1 part at a time.
	#
	# An Enumerator is really like someone that keeps track of a _where you are_ in a collection of things,
	# and knows how to do a few things related to the current location, like:
	# - give me one thing at a time, a.k.a. ".each"
	# - go to the next thing, a.k.a. ".next"
	# - get the next thing, but don't actually move your location forward to it, a.k.a. ".peek"
	# - tell me how many things there are, a.k.a. ".size"
	# - go back to the start, a.k.a. ".rewind"
	# and other cool stuff.

	# There is a very cool subclass of Enumerator that will _only work with a certain number of things at a time._
	# It is class Lazy < Enumerator ( in the enumerator.rb file)
	#
	# Since it only works with a certain number things at a time from your collection,
	# it means you can _read from an infinite collection!_ Yes! it's true!
	#
	#
	# Among the methods it has are:
	#
	#

	# return the number of matches in the file.
	# Don't read the entire file into memory; read it only (num_lines_per_batch) lines at a time
	# @url https://stackoverflow.com/questions/2962134/ruby-read-file-in-batches/41069373#41069373
	def num_matches_in_file(fname, match_regexp)

	num_lines_per_batch = 5000

	num_matched = 0
	File.open(fname, "r") do \|f\|

	# use an enumerator to read just (num_lines_per_batch) lines at a time
	f.lazy.each_slice(num_lines_per_batch) do \|lines\|

	num_matched += lines.select{ \| line \| line.match(match_regexp)}.count

	end

	end

	num_matched

	end