Skip to content

Instantly share code, notes, and snippets.

@nouse
Created September 26, 2014 14:29
Show Gist options
  • Save nouse/ecde2662b6ac188cea66 to your computer and use it in GitHub Desktop.
Save nouse/ecde2662b6ac188cea66 to your computer and use it in GitHub Desktop.
module BM
def self.search(haystack, needle)
Pattern.new(needle).search(haystack)
end
class Pattern < BasicObject
def initialize(needle)
@needle = needle
@length = @needle.length
generate_good_suffices
generate_bad_characters
end
def search(haystack)
haystack_len = haystack.size
return nil if haystack_len == 0
return haystack if @length == 0
s = 0
while s <= haystack_len - @length
j = @length
while (j > 0) && @needle[j-1] == haystack[s+j-1]
j -= 1
end
if(j > 0)
delta1 = @bad[haystack[s+j-1]]
delta2 = @good[j-1] + @length - j
s += [delta1, delta2].max
else
return s
end
end
return nil
end
private
def generate_suffices
@suff = ::Array.new(@length, 0)
g = @length - 1
f = 0
@suff[@length-1] = @length
(@length-2).downto(0).each do |i|
if i > g && @suff[i+@length-1-f] < i-g
@suff[i] = @suff[i+@length-1-f]
else
if (i < g)
g = i
end
f = i
while (g >= 0 && @needle[g] == @needle[g + @length - 1 - f])
g -= 1
end
@suff[i] = f - g;
end
end
end
def generate_good_suffices
generate_suffices
@good = ::Array.new(@length, @length)
(@length-1).downto(0).each do |i|
if @suff[i] == i + 1
0.upto(@length-2-i).each do |j|
@good[j] = @length - 1 - i
end
end
end
(0..@length-2).each do |i|
@good[@length-1-@suff[i]] = @length - 1 - i
end
@good
end
def generate_bad_characters
@bad = ::Hash.new(@length)
@needle.reverse.each_char.each_with_index do |c, i|
next if i == 0
@bad[c] = i
end
end
end
end
file = IO.read 'git-config.1'
require 'strscan'
require 'benchmark'
s = StringScanner.new(file)
pattern = BM::Pattern.new("EXAMPLE")
Benchmark.benchmark "search" do |x|
x.report "regexp" do
1000.times{ file =~ /EXAMPLE/ }
end
x.report "index" do
1000.times{ file.index("EXAMPLE") }
end
x.report "scan" do
1000.times{ s.scan_until(/EXAMPLE/) }
end
x.report "bm" do
1000.times{ pattern.search(file) }
end
end
# result
# regexp 0.010000 0.000000 0.010000 ( 0.001183)
# index 0.000000 0.000000 0.000000 ( 0.001868)
# scan 0.040000 0.000000 0.040000 ( 0.038135)
# bm 0.410000 0.000000 0.410000 ( 0.410668)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment