Nimster · July 4, 2012 00:52
diff --git a/ruby_lazy_enums.rb b/ruby_lazy_enums.rb
 # Motivation:
 # 
 # Using lazy evaluation is the way truly functional (not just 'functional style') programming languages 
 # achieve performance. Ruby is not there yet for several reasons, but at least some of them can be remedied
 # with basic amendments to the standard library.
 #
 # If you want to find, for instance, the first number n<1000 such that the approximation (1+(1/n))^n to e is 
 # less than a certain distance, say, eps = 0.01. If you just write
 (1..1000).to_enum.map { |d| ((1+(1.0/d))**(d) - (Math::E)).abs }.find_index { |x| x < 0.01 }
 # It works, and returns 134. But you calculated 1000-134=876 more values than you had to. With the code below,
 # you can write
 (1..1000).to_enum.lazy_map { |d| ((1+(1.0/d))**(d) - (Math::E)).abs }.find_index { |x| x < 0.01 }
 # And it'll return the same result, but only run on the first 135 values. 
 # In the same way, there is no convenient way in ruby to skip the first X entries from an enumerator. The most
 # convenient way I know of is using .slice, that is calling enum[X..-1]. However, this turns the result into 
 # an array, meaning the entire enumerator is being evaluated. .skip implemented here achieves this skipping.
 #
 # There are better ways to get the estimate of error for the n'th term of this series, namely using Taylor
 # expansion. I just suck at these contrived programming examples, and that's what I came up with.
 #
 # Unfortunately, this currently comes at a cost. External iteration is way slower than ruby's internal 
 # enumerations, and this only becomes worthwhile if you have a very expansive mapping over a large array -
 # for instance, matching regexps:
 # In this benchmark, I hide an 'a' among a huge array, and look for it using regexps. There are obviously 
 # faster ways to do that, I'm just demonstrating where these lazy enums will be useful.
 100.times { 
  e = ((1..100000).to_a << 'a').map {|i| i.to_s }.shuffle.to_enum 
  dtime = Time.now 
  e.lazy_map { |q| /\w+/.match(q) }.find { |q| not q.nil? } 
  mtime += Time.now - dtime 
 } 
 mtime
 => 0.002666999999999999
 100.times { 
  e = ((1..100000).to_a << 'a').map {|i| i.to_s }.shuffle.to_enum 
  dtime = Time.now 
  e.map { |q| /\w+/.match(q) }.find { |q| not q.nil? }
  mtime += Time.now - dtime 
 } 
 mtime
 => 13.525700999999994
 # Whoa, x5,000 times faster! You would expect that it would be twice faster, because there are twice as many
 # mapping operations, in expectation, in the non-lazy version. But apparently, it doesn't scale linearly,
 # because of the extra array copies made with map, compared to lazy_map which just returns and dumps the value.
 # So there you have it. 
 #
 # Until Ruby 2.0 arrives, as far as I know you have to do these things by yourself in order to really do lazy 
 # enumeration ruby style. I'd love to hear of any other way.
 #
 class Enumerator
  def lazy_map(&block)
    Enumerator.new do |yielder|
      self.each do |value|
        yielder.yield(block.call(value))
      end
    end
  end

  def iterate
    loop { yield self.next }
  end

  def skip(n)
    n.times { self.next }
    enum_for(:iterate)
  end
 end
	# Motivation:
	#
	# Using lazy evaluation is the way truly functional (not just 'functional style') programming languages
	# achieve performance. Ruby is not there yet for several reasons, but at least some of them can be remedied
	# with basic amendments to the standard library.
	#
	# If you want to find, for instance, the first number n<1000 such that the approximation (1+(1/n))^n to e is
	# less than a certain distance, say, eps = 0.01. If you just write
	(1..1000).to_enum.map { \|d\| ((1+(1.0/d))**(d) - (Math::E)).abs }.find_index { \|x\| x < 0.01 }
	# It works, and returns 134. But you calculated 1000-134=876 more values than you had to. With the code below,
	# you can write
	(1..1000).to_enum.lazy_map { \|d\| ((1+(1.0/d))**(d) - (Math::E)).abs }.find_index { \|x\| x < 0.01 }
	# And it'll return the same result, but only run on the first 135 values.
	# In the same way, there is no convenient way in ruby to skip the first X entries from an enumerator. The most
	# convenient way I know of is using .slice, that is calling enum[X..-1]. However, this turns the result into
	# an array, meaning the entire enumerator is being evaluated. .skip implemented here achieves this skipping.
	#
	# There are better ways to get the estimate of error for the n'th term of this series, namely using Taylor
	# expansion. I just suck at these contrived programming examples, and that's what I came up with.
	#
	# Unfortunately, this currently comes at a cost. External iteration is way slower than ruby's internal
	# enumerations, and this only becomes worthwhile if you have a very expansive mapping over a large array -
	# for instance, matching regexps:
	# In this benchmark, I hide an 'a' among a huge array, and look for it using regexps. There are obviously
	# faster ways to do that, I'm just demonstrating where these lazy enums will be useful.
	100.times {
	e = ((1..100000).to_a << 'a').map {\|i\| i.to_s }.shuffle.to_enum
	dtime = Time.now
	e.lazy_map { \|q\| /\w+/.match(q) }.find { \|q\| not q.nil? }
	mtime += Time.now - dtime
	}
	mtime
	=> 0.002666999999999999
	100.times {
	e = ((1..100000).to_a << 'a').map {\|i\| i.to_s }.shuffle.to_enum
	dtime = Time.now
	e.map { \|q\| /\w+/.match(q) }.find { \|q\| not q.nil? }
	mtime += Time.now - dtime
	}
	mtime
	=> 13.525700999999994
	# Whoa, x5,000 times faster! You would expect that it would be twice faster, because there are twice as many
	# mapping operations, in expectation, in the non-lazy version. But apparently, it doesn't scale linearly,
	# because of the extra array copies made with map, compared to lazy_map which just returns and dumps the value.
	# So there you have it.
	#
	# Until Ruby 2.0 arrives, as far as I know you have to do these things by yourself in order to really do lazy
	# enumeration ruby style. I'd love to hear of any other way.
	#
	class Enumerator
	def lazy_map(&block)
	Enumerator.new do \|yielder\|
	self.each do \|value\|
	yielder.yield(block.call(value))
	end
	end
	end

	def iterate
	loop { yield self.next }
	end

	def skip(n)
	n.times { self.next }
	enum_for(:iterate)
	end
	end