Skip to content

Instantly share code, notes, and snippets.

@gonzedge
Last active January 23, 2017 02:05
Show Gist options
  • Save gonzedge/dec2c31335342dba02a77141fa8184fb to your computer and use it in GitHub Desktop.
Save gonzedge/dec2c31335342dba02a77141fa8184fb to your computer and use it in GitHub Desktop.
Benchmark comparing fast_trie, triez and rambling-trie for a match prefix example
#!/usr/bin/env ruby
# From https://stackoverflow.com/questions/41521492/faster-way-to-see-if-a-huge-list-of-strings-is-contained-within-another-string/41523304#41523304
# with slight changes
require 'triez'
require 'trie'
require 'rambling-trie'
require 'benchmark'
DICT = '/usr/share/dict/web2'
triez = Triez.new value_type: :object, default: nil
trie = Trie.new
r_trie = Rambling::Trie.create
r_c_trie = Rambling::Trie.create
count = 0
File.foreach(DICT) do |word|
word.chomp!
if word.size > 4
triez[word] = word
r_trie << word
r_c_trie << word
count += 1
end
end
r_c_trie.compress!
puts "word count: #{count}"
def in_trie?(str, trie)
0.upto(str.length - 1) do |i|
node = trie.root
i.upto(str.length - 1) do |j|
break unless node.walk! str[j]
if node.terminal?
return str[i..j]
end
end
end
nil
end
def in_triez?(str, triez)
triez.change_all(:substring, str) do |v|
return v if v
end
nil
end
Benchmark.bm(12) do |b|
b.report('trie') do
100_000.times { in_trie?('ifdxawesome45someword3', trie) }
end
b.report('triez') do
100_000.times { in_triez?('ifdxawesome45someword3', triez) }
end
b.report('r_trie') do
100_000.times { r_trie.words_within? 'ifdxawesome45someword3' }
end
b.report('r_trie c') do
100_000.times { r_c_trie.words_within? 'ifdxawesome45someword3' }
end
b.report('trie tail') do
100_000.times { in_trie?('ifdx45someword3awesome', trie) }
end
b.report('triez tail') do
100_000.times { in_triez?('ifdx45someword3awesome', triez) }
end
b.report('r_trie tail') do
100_000.times { r_trie.words_within? 'ifdx45someword3awesome' }
end
b.report('r_trie tail c') do
100_000.times { r_c_trie.words_within? 'ifdx45someword3awesome' }
end
end
word count: 228982
user system total real
trie 4.620000 0.080000 4.700000 ( 4.894384)
triez 1.850000 0.030000 1.880000 ( 1.932774)
r_trie 2.560000 0.030000 2.590000 ( 2.669277)
r_trie c 5.250000 0.080000 5.330000 ( 6.830555)
trie tail 4.540000 0.100000 4.640000 ( 4.987587)
triez tail 4.810000 0.060000 4.870000 ( 5.028469)
r_trie tail 5.420000 0.100000 5.520000 ( 6.135820)
r_trie tail c 11.870000 0.100000 11.970000 ( 12.246177)
gem install fast_trie triez rambling-trie
chmod +x ./match_prefix_benchmark.rb
./match_prefix_benchmark.rb
@gonzedge
Copy link
Author

This benchmark has been run with:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment