Skip to content

Instantly share code, notes, and snippets.

@maiha
Last active June 14, 2016 12:01
Show Gist options
  • Save maiha/e9fe33f2dbc54a9109d95db2ecd89749 to your computer and use it in GitHub Desktop.
Save maiha/e9fe33f2dbc54a9109d95db2ecd89749 to your computer and use it in GitHub Desktop.
crystal benchmark to create string
% crystal build --release bench.cr
% ./bench
"127.0.0.1"
"127.0.0.1"
"127.0.0.1"
"127.0.0.1"
"127.0.0.1"
"127:0:0:1"
"127:0:0:1"
"127:0:0:1"
user system total real
class.format 0.340000 4.740000 5.080000 ( 3.177950)
method.format 0.100000 4.790000 4.890000 ( 3.113015)
class.string_plus 0.020000 2.360000 2.380000 ( 1.344859)
class.memory_io 0.020000 2.480000 2.500000 ( 1.436772)
class.string_build 0.020000 2.170000 2.190000 ( 1.336410)
class.string_inline 0.020000 1.560000 1.580000 ( 0.944456)
macro_string_inline 0.030000 1.420000 1.450000 ( 0.890514)
instance.to_s(io) 0.220000 2.560000 2.780000 ( 1.607884)
% grep CPU /proc/cpuinfo | head -1
model name : Intel(R) Core(TM) i7-4800MQ CPU @ 2.70GHz
% crystal -v
Crystal 0.17.4 [032ab91] (2016-05-26)
# Benchmark for converting IPv4 value(UInt32) to well-known string
# 2130706433.to_u32 => "127.0.0.1"
require "benchmark"
def format(v : UInt32)
"%s.%s.%s.%s" % [
(v & 0xff000000) >> 24,
(v & 0x00ff0000) >> 16,
(v & 0x0000ff00) >> 8,
(v & 0x000000ff),
]
end
class IpAddr
def self.format(v : UInt32)
"%s.%s.%s.%s" % [
(v & 0xff000000) >> 24,
(v & 0x00ff0000) >> 16,
(v & 0x0000ff00) >> 8,
(v & 0x000000ff),
]
end
def self.string_plus(v : UInt32)
((v & 0xff000000) >> 24).to_s + "." +
((v & 0x00ff0000) >> 16).to_s + "." +
((v & 0x0000ff00) >> 8).to_s + "." +
((v & 0x000000ff) ).to_s
end
def self.memory_io(v : UInt32)
io = MemoryIO.new
io << ((v & 0xff000000) >> 24).to_s
io << "."
io << ((v & 0x00ff0000) >> 16).to_s
io << "."
io << ((v & 0x0000ff00) >> 8).to_s
io << "."
io << ((v & 0x000000ff) ).to_s
String.new(io.to_slice)
end
def self.string_build(v : UInt32)
String.build do |io|
io << ((v & 0xff000000) >> 24).to_s
io << "."
io << ((v & 0x00ff0000) >> 16).to_s
io << "."
io << ((v & 0x0000ff00) >> 8).to_s
io << "."
io << ((v & 0x000000ff) ).to_s
end
end
def self.string_inline(v : UInt32)
"#{(v & 0xff000000) >> 24}:#{(v & 0x00ff0000) >> 16}:#{(v & 0x0000ff00) >> 8}:#{v & 0x000000ff}"
end
def initialize(@value : UInt32)
end
def to_s(io : IO)
io << self.class.string_inline(@value)
end
end
macro macro_string_inline(v)
"#{({{v}} & 0xff000000) >> 24}:#{({{v}} & 0x00ff0000) >> 16}:#{({{v}} & 0x0000ff00) >> 8}:#{{{v}} & 0x000000ff}"
end
### First, check implementations
v = 2130706433.to_u32
p IpAddr.format(v)
p format(v)
p IpAddr.string_plus(v)
p IpAddr.memory_io(v)
p IpAddr.string_build(v)
p IpAddr.string_inline(v)
p macro_string_inline(v)
p IpAddr.new(v).to_s
### Bench
n = 3_000_000
Benchmark.bm do |x|
x.report("class.format" ) { n.times{|i| IpAddr.format(i.to_u32) } }
x.report("method.format" ) { n.times{|i| format(i.to_u32) } }
x.report("class.string_plus" ) { n.times{|i| IpAddr.string_plus(i.to_u32) } }
x.report("class.memory_io" ) { n.times{|i| IpAddr.memory_io(i.to_u32) } }
x.report("class.string_build" ) { n.times{|i| IpAddr.string_build(i.to_u32) } }
x.report("class.string_inline") { n.times{|i| IpAddr.string_inline(i.to_u32) } }
x.report("macro_string_inline") { n.times{|i| macro_string_inline(i.to_u32) } }
x.report("instance.to_s(io)" ) { n.times{|i| IpAddr.new(i.to_u32).to_s } }
end
@asterite
Copy link

@maiha doing io << something.to_s is not the preferred way to do it, just do io << something. When you invoke to_s it creates a string in memory and then appends it to the IO. If you do io << something that something is written directly to the IO, without allocating an extra string.

Another thing: don't use array literal [...] in format, use a tuple literal: {...}. This, again, avoids memory allocations.

Then I found a performance bug in String#% that I already fixed in master ( crystal-lang/crystal@27c1a0f ) and will be available in 0.18.0.

With that, here's the changed script and my results:

require "benchmark"

def format(v : UInt32)
  "%s.%s.%s.%s" % {
    (v & 0xff000000) >> 24,
    (v & 0x00ff0000) >> 16,
    (v & 0x0000ff00) >> 8,
    (v & 0x000000ff),
  }
end

class IpAddr
  def self.format(v : UInt32)
    "%s.%s.%s.%s" % {
      (v & 0xff000000) >> 24,
      (v & 0x00ff0000) >> 16,
      (v & 0x0000ff00) >> 8,
      (v & 0x000000ff),
    }
  end

  def self.string_plus(v : UInt32)
    ((v & 0xff000000) >> 24).to_s + "." +
      ((v & 0x00ff0000) >> 16).to_s + "." +
      ((v & 0x0000ff00) >> 8).to_s + "." +
      ((v & 0x000000ff)).to_s
  end

  def self.memory_io(v : UInt32)
    io = MemoryIO.new
    io << ((v & 0xff000000) >> 24)
    io << "."
    io << ((v & 0x00ff0000) >> 16)
    io << "."
    io << ((v & 0x0000ff00) >> 8)
    io << "."
    io << ((v & 0x000000ff))
    String.new(io.to_slice)
  end

  def self.string_build(v : UInt32)
    String.build do |io|
      io << ((v & 0xff000000) >> 24)
      io << "."
      io << ((v & 0x00ff0000) >> 16)
      io << "."
      io << ((v & 0x0000ff00) >> 8)
      io << "."
      io << ((v & 0x000000ff))
    end
  end

  def self.string_inline(v : UInt32)
    "#{(v & 0xff000000) >> 24}:#{(v & 0x00ff0000) >> 16}:#{(v & 0x0000ff00) >> 8}:#{v & 0x000000ff}"
  end

  def initialize(@value : UInt32)
  end

  def to_s(io : IO)
    io << self.class.string_inline(@value)
  end
end

macro macro_string_inline(v)
  "#{({{v}} & 0xff000000) >> 24}:#{({{v}} & 0x00ff0000) >> 16}:#{({{v}} & 0x0000ff00) >> 8}:#{{{v}} & 0x000000ff}"
end

# ## First, check implementations
v = 2130706433.to_u32
p IpAddr.format(v)
p format(v)
p IpAddr.string_plus(v)
p IpAddr.memory_io(v)
p IpAddr.string_build(v)
p IpAddr.string_inline(v)
p macro_string_inline(v)
p IpAddr.new(v).to_s

# ## Bench
n = 3_000_000
Benchmark.bm do |x|
  x.report("class.format") { n.times { |i| IpAddr.format(i.to_u32) } }
  x.report("method.format") { n.times { |i| format(i.to_u32) } }
  x.report("class.string_plus") { n.times { |i| IpAddr.string_plus(i.to_u32) } }
  x.report("class.memory_io") { n.times { |i| IpAddr.memory_io(i.to_u32) } }
  x.report("class.string_build") { n.times { |i| IpAddr.string_build(i.to_u32) } }
  x.report("class.string_inline") { n.times { |i| IpAddr.string_inline(i.to_u32) } }
  x.report("macro_string_inline") { n.times { |i| macro_string_inline(i.to_u32) } }
  x.report("instance.to_s(io)") { n.times { |i| IpAddr.new(i.to_u32).to_s } }
end

Results:

"127.0.0.1"
"127.0.0.1"
"127.0.0.1"
"127.0.0.1"
"127.0.0.1"
"127:0:0:1"
"127:0:0:1"
"127:0:0:1"
                          user     system      total        real
class.format          1.310000   0.960000   2.270000 (  1.707701)
method.format         1.320000   0.980000   2.300000 (  1.710724)
class.string_plus     1.140000   1.650000   2.790000 (  1.828637)
class.memory_io       0.870000   1.420000   2.290000 (  1.439918)
class.string_build    0.660000   0.910000   1.570000 (  1.033124)
class.string_inline   0.670000   0.880000   1.550000 (  1.043354)
macro_string_inline   0.670000   0.910000   1.580000 (  1.042234)
instance.to_s(io)     1.370000   1.830000   3.200000 (  2.102281)

The fastest is String.build and interpolation (class.string_inline), because they are basically the same (an interpolation generates a String.build). Format is still slower because the format string "%s.%s.%s.%s" needs to be parsed every time you format the string. But at least now it's just 150% slower, not 300% :-)

instance.to_s is also slow because an intermediate string is created. The best thing to do is just io << ... in to_s(io).

I plan to write a performance guide on the docs that explain how to write the most efficient code :-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment