Skip to content

Instantly share code, notes, and snippets.

@shirosaki
Created August 29, 2012 15:30
Show Gist options
  • Save shirosaki/3514397 to your computer and use it in GitHub Desktop.
Save shirosaki/3514397 to your computer and use it in GitHub Desktop.
benchmark scan vs unpack for utf-8 string
$ ruby -v -Ku scan_unpack_banch.rb ~
ruby 1.8.7 (2012-02-08 patchlevel 358) [universal-darwin12.0]
Rehearsal -----------------------------------------------------
scan each len: 0.140000 0.000000 0.140000 ( 0.141777)
unpack each len: 0.070000 0.000000 0.070000 ( 0.064889)
scan total len: 0.110000 0.000000 0.110000 ( 0.108800)
unpack total len: 0.000000 0.000000 0.000000 ( 0.004272)
-------------------------------------------- total: 0.320000sec
user system total real
scan each len: 0.130000 0.000000 0.130000 ( 0.131575)
unpack each len: 0.070000 0.000000 0.070000 ( 0.065743)
scan total len: 0.100000 0.000000 0.100000 ( 0.099954)
unpack total len: 0.010000 0.000000 0.010000 ( 0.004284)
Results:
true
true
require 'benchmark'
TIMES = 200
string = "a¢あ𤭢" * TIMES
r1, r2, r3, r4 = nil, nil, nil, nil
Benchmark.bmbm do |x|
x.report("scan each len:") { TIMES.times { r1 = string.scan(/./mu).map { |c| c.length } } }
x.report("unpack each len:"){ TIMES.times { r2 = string.unpack("U*").map { |c|
# From http://en.wikipedia.org/wiki/UTF-8#Description
if c <= 0x7F
1
elsif c <= 0x7FF
2
elsif c <= 0xFFFF
3
elsif c <= 0x1FFFFF
4
elsif c <= 0x3FFFFFF
5
else
6
end
} } }
x.report("scan total len:") { TIMES.times { r3 = string.scan(/./mu)[0,3].to_s.length } }
x.report("unpack total len:") { TIMES.times { r4 = string.unpack("U*")[0,3].pack("U*").length } }
end
puts
puts "Results:"
p r1 == r2
p r3 == r4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment