Skip to content

Instantly share code, notes, and snippets.

@asterite
Last active August 29, 2015 14:10
Show Gist options
  • Save asterite/d33c04087cdecf52d8ce to your computer and use it in GitHub Desktop.
Save asterite/d33c04087cdecf52d8ce to your computer and use it in GitHub Desktop.
require "iterator"
require "benchmark"
Benchmark.bm do |x|
x.report("Array#new") do
1_000.times { Array.new(100_000) { |i| i * 2 } }
end
x.report("times#map") do
1_000.times { 100_000.times.map { |i| i * 2 }.to_a }
end
end
# user system total real
# Array#new 0.150000 0.310000 0.460000 ( 0.265622)
# times#map 0.600000 0.860000 1.460000 ( 0.870755)
# Conclusion: Int#times#map#to_a is 3 times slower :-(
#
# This is because blocks in Crystal, when not captured, are always inlined into the call.
#
# So doing `Array.new(5) { ... }` will allocate an array of length 5 and fill it with the elements, without
# doing bounds check (we know the length), without a function pointer being created,
# and with the call being inlined in the caller. No extra memory allocations.
#
# With the iterator approach we have to allocate memory for the iterator (could be done with a struct to avoid
# this but because it's mutable it can lead to unexpected behaviour), then we allocate memory for the function
# pointer that represents the mapping function, then we iterate all of that blindly calling the functions, without
# being able to inline anything, and at the end we create an array of an unknown length (default = 4) and push
# elements to it, forced to do bounds checking when pushing the array (and regrowing the internal buffer).
#
# So... maybe we can have that method, but I'm affraid that if it becomes idiomatic then code will be slower
# for no real reason other than "it's cool that I can do this".
#
# On the other hand, premature optimization is the root of all evil, so these things could be used and later
# optimized if they turn out to be the bottleneck.
#
# Note that all of this does not apply to Ruby, because:
# * It's interpreted, so (probably) none of these optimizations apply.
# * Blocks (probably) are always represented as function pointers, or at least they can't be inlined.
#
# In my tests with Ruby 5.times.map { ... } is just a little slower (0.8s vs 0.6s in one test). Still, it's slower ;-)
#
# Finally, with a big array or range, doing multiple chained operations with iterators is much faster than
# doing this operations with intermediate arrays, so iterators are worthy... but not always.
require "iterator"
puts 5.times.map { |x| x * 2 }.to_a #=> [0, 2, 4, 6, 8]
class StopIteration < Exception
def initialize
super("StopIteration")
end
end
module Iterator(T)
include Enumerable(T)
def map(&func : T -> U)
MapIterator(typeof(self), T, U).new(self, func)
end
def select(&func : T -> B)
SelectIterator(typeof(self), T).new(self, func)
end
def reject(&func : T -> B)
RejectIterator(typeof(self), T).new(self, func)
end
def take(n)
TakeIterator(typeof(self), T).new(self, n)
end
def skip(n)
SkipIterator(typeof(self), T).new(self, n)
end
def zip(other : Iterator(U))
ZipIterator(typeof(self), typeof(other), T, U).new(self, other)
end
def each
while value = self.next
yield value
end
rescue StopIteration
end
end
class Array
def iterator
ArrayIterator.new(self)
end
end
class ArrayIterator(T)
include Iterator(T)
def initialize(@array : Array(T))
@index = 0
end
def next
if @index >= @array.length
raise StopIteration.new
end
value = @array.buffer[@index]
@index += 1
value
end
end
struct Range
def iterator
RangeIterator.new(self)
end
end
class RangeIterator(T)
include Iterator(T)
def initialize(@range : Range(T, T))
@current = range.begin
@reached_end = false
end
def next
if @reached_end
raise StopIteration.new
end
if @current == @range.end
@reached_end = true
if @range.excludes_end?
raise StopIteration.new
else
return @current
end
else
value = @current
@current = @current.succ
value
end
end
end
struct Int
def times
IntIterator.new(self)
end
end
class IntIterator(T)
include Iterator(T)
def initialize(@number : T)
@current = 0
end
def next
if @current >= @number
raise StopIteration.new
end
value = @current
@current += 1
value
end
end
struct MapIterator(I, T, U)
include Iterator(U)
def initialize(@iter : Iterator(T), @func : T -> U)
end
def next
@func.call(@iter.next)
end
end
struct SelectIterator(I, T)
include Iterator(T)
def initialize(@iter : Iterator(T), @func : T -> B)
end
def next
while true
value = @iter.next
if @func.call(value)
return value
end
end
end
end
struct RejectIterator(I, T)
include Iterator(T)
def initialize(@iter : Iterator(T), @func : T -> B)
end
def next
while true
value = @iter.next
unless @func.call(value)
return value
end
end
end
end
struct TakeIterator(I, T)
include Iterator(T)
def initialize(@iter : Iterator(T), @n : Int)
end
def next
if @n > 0
value = @iter.next
@n -= 1
value
else
raise StopIteration.new
end
end
end
struct SkipIterator(I, T)
include Iterator(T)
def initialize(@iter : Iterator(T), @n : Int)
end
def next
while @n > 0
@iter.next
@n -= 1
end
@iter.next
end
end
struct ZipIterator(I1, I2, T, U)
include Iterator({T, U})
def initialize(@iter1, @iter2)
end
def next
{@iter1.next, @iter2.next}
end
end
require "spec"
require "iterator"
describe Iterator do
describe "ArrayIterator" do
it "does next" do
a = [1, 2, 3]
iterator = a.iterator
iterator.next.should eq(1)
iterator.next.should eq(2)
iterator.next.should eq(3)
expect_raises StopIteration do
iterator.next
end
end
end
describe "RangeIterator" do
it "does next with inclusive range" do
a = 1..3
iterator = a.iterator
iterator.next.should eq(1)
iterator.next.should eq(2)
iterator.next.should eq(3)
expect_raises StopIteration do
iterator.next
end
end
it "does next with exclusive range" do
r = 1...3
iterator = r.iterator
iterator.next.should eq(1)
iterator.next.should eq(2)
expect_raises StopIteration do
iterator.next
end
end
end
describe "IntIterator" do
it "does next with times" do
n = 3
iterator = n.times
iterator.next.should eq(0)
iterator.next.should eq(1)
iterator.next.should eq(2)
expect_raises StopIteration do
iterator.next
end
end
end
describe "map" do
it "does map with Range iterator" do
(1..3).iterator.map { |x| x * 2 }.to_a.should eq([2, 4, 6])
end
end
describe "select" do
it "does select with Range iterator" do
(1..3).iterator.select { |x| x >= 2 }.to_a.should eq([2, 3])
end
end
describe "reject" do
it "does reject with Range iterator" do
(1..3).iterator.reject { |x| x >= 2 }.to_a.should eq([1])
end
end
describe "take" do
it "does take with Range iterator" do
(1..3).iterator.take(2).to_a.should eq([1, 2])
end
it "does take with more than available" do
(1..3).iterator.take(10).to_a.should eq([1, 2, 3])
end
end
describe "skip" do
it "does skip with Range iterator" do
(1..3).iterator.skip(2).to_a.should eq([3])
end
end
describe "zip" do
it "does skip with Range iterator" do
r1 = (1..3).iterator
r2 = (4..6).iterator
r1.zip(r2).to_a.should eq([{1, 4}, {2, 5}, {3, 6}])
end
end
it "combines many iterators" do
(1..100).iterator
.select { |x| 50 <= x < 60 }
.map { |x| x * 2 }
.take(3)
.to_a
.should eq([100, 102, 104])
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment