asterite · August 29, 2015 14:10
diff --git a/array_new_vs_int_times_map.cr b/array_new_vs_int_times_map.cr
 require "iterator"
 require "benchmark"

 Benchmark.bm do |x|
  x.report("Array#new") do
    1_000.times { Array.new(100_000) { |i| i * 2 } }
  end

  x.report("times#map") do
    1_000.times { 100_000.times.map { |i| i * 2 }.to_a }
  end
 end

 #                user     system      total        real
 # Array#new   0.150000   0.310000   0.460000 (  0.265622)
 # times#map   0.600000   0.860000   1.460000 (  0.870755)

 # Conclusion: Int#times#map#to_a is 3 times slower :-(
 #
 # This is because blocks in Crystal, when not captured, are always inlined into the call.
 #
 # So doing `Array.new(5) { ... }` will allocate an array of length 5 and fill it with the elements, without
 # doing bounds check (we know the length), without a function pointer being created,
 # and with the call being inlined in the caller. No extra memory allocations.
 #
 # With the iterator approach we have to allocate memory for the iterator (could be done with a struct to avoid
 # this but because it's mutable it can lead to unexpected behaviour), then we allocate memory for the function
 # pointer that represents the mapping function, then we iterate all of that blindly calling the functions, without
 # being able to inline anything, and at the end we create an array of an unknown length (default = 4) and push
 # elements to it, forced to do bounds checking when pushing the array (and regrowing the internal buffer).
 #
 # So... maybe we can have that method, but I'm affraid that if it becomes idiomatic then code will be slower
 # for no real reason other than "it's cool that I can do this".
 #
 # On the other hand, premature optimization is the root of all evil, so these things could be used and later
 # optimized if they turn out to be the bottleneck.
 #
 # Note that all of this does not apply to Ruby, because:
 #  * It's interpreted, so (probably) none of these optimizations apply.
 #  * Blocks (probably) are always represented as function pointers, or at least they can't be inlined.
 #
 # In my tests with Ruby 5.times.map { ... } is just a little slower (0.8s vs 0.6s in one test). Still, it's slower ;-)
 #
 # Finally, with a big array or range, doing multiple chained operations with iterators is much faster than
 # doing this operations with intermediate arrays, so iterators are worthy... but not always.
diff --git a/int_times_map.cr b/int_times_map.cr
 require "iterator"

 puts 5.times.map { |x| x * 2 }.to_a #=> [0, 2, 4, 6, 8]
diff --git a/iterator.cr b/iterator.cr
 class StopIteration < Exception
  def initialize
    super("StopIteration")
  end
 end

 module Iterator(T)
  include Enumerable(T)

  def map(&func : T -> U)
    MapIterator(typeof(self), T, U).new(self, func)
  end

  def select(&func : T -> B)
    SelectIterator(typeof(self), T).new(self, func)
  end

  def reject(&func : T -> B)
    RejectIterator(typeof(self), T).new(self, func)
  end

  def take(n)
    TakeIterator(typeof(self), T).new(self, n)
  end

  def skip(n)
    SkipIterator(typeof(self), T).new(self, n)
  end

  def zip(other : Iterator(U))
    ZipIterator(typeof(self), typeof(other), T, U).new(self, other)
  end

  def each
    while value = self.next
      yield value
    end
  rescue StopIteration
  end
 end

 class Array
  def iterator
    ArrayIterator.new(self)
  end
 end

 class ArrayIterator(T)
  include Iterator(T)

  def initialize(@array : Array(T))
    @index = 0
  end

  def next
    if @index >= @array.length
      raise StopIteration.new
    end

    value = @array.buffer[@index]
    @index += 1
    value
  end
 end

 struct Range
  def iterator
    RangeIterator.new(self)
  end
 end

 class RangeIterator(T)
  include Iterator(T)

  def initialize(@range : Range(T, T))
    @current = range.begin
    @reached_end = false
  end

  def next
    if @reached_end
      raise StopIteration.new
    end

    if @current == @range.end
      @reached_end = true

      if @range.excludes_end?
        raise StopIteration.new
      else
        return @current
      end
    else
      value = @current
      @current = @current.succ
      value
    end
  end
 end

 struct Int
  def times
    IntIterator.new(self)
  end
 end

 class IntIterator(T)
  include Iterator(T)

  def initialize(@number : T)
    @current = 0
  end

  def next
    if @current >= @number
      raise StopIteration.new
    end

    value = @current
    @current += 1
    value
  end
 end

 struct MapIterator(I, T, U)
  include Iterator(U)

  def initialize(@iter : Iterator(T), @func : T -> U)
  end

  def next
    @func.call(@iter.next)
  end
 end

 struct SelectIterator(I, T)
  include Iterator(T)

  def initialize(@iter : Iterator(T), @func : T -> B)
  end

  def next
    while true
      value = @iter.next
      if @func.call(value)
        return value
      end
    end
  end
 end

 struct RejectIterator(I, T)
  include Iterator(T)

  def initialize(@iter : Iterator(T), @func : T -> B)
  end

  def next
    while true
      value = @iter.next
      unless @func.call(value)
        return value
      end
    end
  end
 end

 struct TakeIterator(I, T)
  include Iterator(T)

  def initialize(@iter : Iterator(T), @n : Int)
  end

  def next
    if @n > 0
      value = @iter.next
      @n -= 1
      value
    else
      raise StopIteration.new
    end
  end
 end

 struct SkipIterator(I, T)
  include Iterator(T)

  def initialize(@iter : Iterator(T), @n : Int)
  end

  def next
    while @n > 0
      @iter.next
      @n -= 1
    end
    @iter.next
  end
 end

 struct ZipIterator(I1, I2, T, U)
  include Iterator({T, U})

  def initialize(@iter1, @iter2)
  end

  def next
    {@iter1.next, @iter2.next}
  end
 end
diff --git a/iterator_spec.cr b/iterator_spec.cr
 require "spec"
 require "iterator"

 describe Iterator do
  describe "ArrayIterator" do
    it "does next" do
      a = [1, 2, 3]
      iterator = a.iterator
      iterator.next.should eq(1)
      iterator.next.should eq(2)
      iterator.next.should eq(3)
      expect_raises StopIteration do
        iterator.next
      end
    end
  end

  describe "RangeIterator" do
    it "does next with inclusive range" do
      a = 1..3
      iterator = a.iterator
      iterator.next.should eq(1)
      iterator.next.should eq(2)
      iterator.next.should eq(3)
      expect_raises StopIteration do
        iterator.next
      end
    end

    it "does next with exclusive range" do
      r = 1...3
      iterator = r.iterator
      iterator.next.should eq(1)
      iterator.next.should eq(2)
      expect_raises StopIteration do
        iterator.next
      end
    end
  end

  describe "IntIterator" do
    it "does next with times" do
      n = 3
      iterator = n.times
      iterator.next.should eq(0)
      iterator.next.should eq(1)
      iterator.next.should eq(2)
      expect_raises StopIteration do
        iterator.next
      end
    end
  end

  describe "map" do
    it "does map with Range iterator" do
      (1..3).iterator.map { |x| x * 2 }.to_a.should eq([2, 4, 6])
    end
  end

  describe "select" do
    it "does select with Range iterator" do
      (1..3).iterator.select { |x| x >= 2 }.to_a.should eq([2, 3])
    end
  end

  describe "reject" do
    it "does reject with Range iterator" do
      (1..3).iterator.reject { |x| x >= 2 }.to_a.should eq([1])
    end
  end

  describe "take" do
    it "does take with Range iterator" do
      (1..3).iterator.take(2).to_a.should eq([1, 2])
    end

    it "does take with more than available" do
      (1..3).iterator.take(10).to_a.should eq([1, 2, 3])
    end
  end

  describe "skip" do
    it "does skip with Range iterator" do
      (1..3).iterator.skip(2).to_a.should eq([3])
    end
  end

  describe "zip" do
    it "does skip with Range iterator" do
      r1 = (1..3).iterator
      r2 = (4..6).iterator
      r1.zip(r2).to_a.should eq([{1, 4}, {2, 5}, {3, 6}])
    end
  end

  it "combines many iterators" do
    (1..100).iterator
            .select { |x| 50 <= x < 60 }
            .map { |x| x * 2 }
            .take(3)
            .to_a
            .should eq([100, 102, 104])
  end
 end
	require "iterator"
	require "benchmark"

	Benchmark.bm do \|x\|
	x.report("Array#new") do
	1_000.times { Array.new(100_000) { \|i\| i * 2 } }
	end

	x.report("times#map") do
	1_000.times { 100_000.times.map { \|i\| i * 2 }.to_a }
	end
	end

	# user system total real
	# Array#new 0.150000 0.310000 0.460000 ( 0.265622)
	# times#map 0.600000 0.860000 1.460000 ( 0.870755)

	# Conclusion: Int#times#map#to_a is 3 times slower :-(
	#
	# This is because blocks in Crystal, when not captured, are always inlined into the call.
	#
	# So doing `Array.new(5) { ... }` will allocate an array of length 5 and fill it with the elements, without
	# doing bounds check (we know the length), without a function pointer being created,
	# and with the call being inlined in the caller. No extra memory allocations.
	#
	# With the iterator approach we have to allocate memory for the iterator (could be done with a struct to avoid
	# this but because it's mutable it can lead to unexpected behaviour), then we allocate memory for the function
	# pointer that represents the mapping function, then we iterate all of that blindly calling the functions, without
	# being able to inline anything, and at the end we create an array of an unknown length (default = 4) and push
	# elements to it, forced to do bounds checking when pushing the array (and regrowing the internal buffer).
	#
	# So... maybe we can have that method, but I'm affraid that if it becomes idiomatic then code will be slower
	# for no real reason other than "it's cool that I can do this".
	#
	# On the other hand, premature optimization is the root of all evil, so these things could be used and later
	# optimized if they turn out to be the bottleneck.
	#
	# Note that all of this does not apply to Ruby, because:
	# * It's interpreted, so (probably) none of these optimizations apply.
	# * Blocks (probably) are always represented as function pointers, or at least they can't be inlined.
	#
	# In my tests with Ruby 5.times.map { ... } is just a little slower (0.8s vs 0.6s in one test). Still, it's slower ;-)
	#
	# Finally, with a big array or range, doing multiple chained operations with iterators is much faster than
	# doing this operations with intermediate arrays, so iterators are worthy... but not always.
	require "iterator"

	puts 5.times.map { \|x\| x * 2 }.to_a #=> [0, 2, 4, 6, 8]
	class StopIteration < Exception
	def initialize
	super("StopIteration")
	end
	end

	module Iterator(T)
	include Enumerable(T)

	def map(&func : T -> U)
	MapIterator(typeof(self), T, U).new(self, func)
	end

	def select(&func : T -> B)
	SelectIterator(typeof(self), T).new(self, func)
	end

	def reject(&func : T -> B)
	RejectIterator(typeof(self), T).new(self, func)
	end

	def take(n)
	TakeIterator(typeof(self), T).new(self, n)
	end

	def skip(n)
	SkipIterator(typeof(self), T).new(self, n)
	end

	def zip(other : Iterator(U))
	ZipIterator(typeof(self), typeof(other), T, U).new(self, other)
	end

	def each
	while value = self.next
	yield value
	end
	rescue StopIteration
	end
	end

	class Array
	def iterator
	ArrayIterator.new(self)
	end
	end

	class ArrayIterator(T)
	include Iterator(T)

	def initialize(@array : Array(T))
	@index = 0
	end

	def next
	if @index >= @array.length
	raise StopIteration.new
	end

	value = @array.buffer[@index]
	@index += 1
	value
	end
	end

	struct Range
	def iterator
	RangeIterator.new(self)
	end
	end

	class RangeIterator(T)
	include Iterator(T)

	def initialize(@range : Range(T, T))
	@current = range.begin
	@reached_end = false
	end

	def next
	if @reached_end
	raise StopIteration.new
	end

	if @current == @range.end
	@reached_end = true

	if @range.excludes_end?
	raise StopIteration.new
	else
	return @current
	end
	else
	value = @current
	@current = @current.succ
	value
	end
	end
	end

	struct Int
	def times
	IntIterator.new(self)
	end
	end

	class IntIterator(T)
	include Iterator(T)

	def initialize(@number : T)
	@current = 0
	end

	def next
	if @current >= @number
	raise StopIteration.new
	end

	value = @current
	@current += 1
	value
	end
	end

	struct MapIterator(I, T, U)
	include Iterator(U)

	def initialize(@iter : Iterator(T), @func : T -> U)
	end

	def next
	@func.call(@iter.next)
	end
	end

	struct SelectIterator(I, T)
	include Iterator(T)

	def initialize(@iter : Iterator(T), @func : T -> B)
	end

	def next
	while true
	value = @iter.next
	if @func.call(value)
	return value
	end
	end
	end
	end

	struct RejectIterator(I, T)
	include Iterator(T)

	def initialize(@iter : Iterator(T), @func : T -> B)
	end

	def next
	while true
	value = @iter.next
	unless @func.call(value)
	return value
	end
	end
	end
	end

	struct TakeIterator(I, T)
	include Iterator(T)

	def initialize(@iter : Iterator(T), @n : Int)
	end

	def next
	if @n > 0
	value = @iter.next
	@n -= 1
	value
	else
	raise StopIteration.new
	end
	end
	end

	struct SkipIterator(I, T)
	include Iterator(T)

	def initialize(@iter : Iterator(T), @n : Int)
	end

	def next
	while @n > 0
	@iter.next
	@n -= 1
	end
	@iter.next
	end
	end

	struct ZipIterator(I1, I2, T, U)
	include Iterator({T, U})

	def initialize(@iter1, @iter2)
	end

	def next
	{@iter1.next, @iter2.next}
	end
	end
	require "spec"
	require "iterator"

	describe Iterator do
	describe "ArrayIterator" do
	it "does next" do
	a = [1, 2, 3]
	iterator = a.iterator
	iterator.next.should eq(1)
	iterator.next.should eq(2)
	iterator.next.should eq(3)
	expect_raises StopIteration do
	iterator.next
	end
	end
	end

	describe "RangeIterator" do
	it "does next with inclusive range" do
	a = 1..3
	iterator = a.iterator
	iterator.next.should eq(1)
	iterator.next.should eq(2)
	iterator.next.should eq(3)
	expect_raises StopIteration do
	iterator.next
	end
	end

	it "does next with exclusive range" do
	r = 1...3
	iterator = r.iterator
	iterator.next.should eq(1)
	iterator.next.should eq(2)
	expect_raises StopIteration do
	iterator.next
	end
	end
	end

	describe "IntIterator" do
	it "does next with times" do
	n = 3
	iterator = n.times
	iterator.next.should eq(0)
	iterator.next.should eq(1)
	iterator.next.should eq(2)
	expect_raises StopIteration do
	iterator.next
	end
	end
	end

	describe "map" do
	it "does map with Range iterator" do
	(1..3).iterator.map { \|x\| x * 2 }.to_a.should eq([2, 4, 6])
	end
	end

	describe "select" do
	it "does select with Range iterator" do
	(1..3).iterator.select { \|x\| x >= 2 }.to_a.should eq([2, 3])
	end
	end

	describe "reject" do
	it "does reject with Range iterator" do
	(1..3).iterator.reject { \|x\| x >= 2 }.to_a.should eq([1])
	end
	end

	describe "take" do
	it "does take with Range iterator" do
	(1..3).iterator.take(2).to_a.should eq([1, 2])
	end

	it "does take with more than available" do
	(1..3).iterator.take(10).to_a.should eq([1, 2, 3])
	end
	end

	describe "skip" do
	it "does skip with Range iterator" do
	(1..3).iterator.skip(2).to_a.should eq([3])
	end
	end

	describe "zip" do
	it "does skip with Range iterator" do
	r1 = (1..3).iterator
	r2 = (4..6).iterator
	r1.zip(r2).to_a.should eq([{1, 4}, {2, 5}, {3, 6}])
	end
	end

	it "combines many iterators" do
	(1..100).iterator
	.select { \|x\| 50 <= x < 60 }
	.map { \|x\| x * 2 }
	.take(3)
	.to_a
	.should eq([100, 102, 104])
	end
	end