Created
February 7, 2015 14:59
-
-
Save ccocchi/f8d58626a9ac1b9536e5 to your computer and use it in GitHub Desktop.
Profanity filter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'benchmark/ips' | |
text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ullamcorper eleifend velit sed placerat. In libero leo, fringilla ut mauris nec, bibendum varius nisi. Nam rhoncus facilisis lacinia. Integer massa quam, vestibulum eget est sed, elementum vulputate nibh. Mauris fermentum tellus eu commodo blandit. Cras ornare, risus id semper lacinia, est urna maximus dolor, aliquam maximus metus sem a velit. Suspendisse vel libero eu leo volutpat eleifend accumsan nec massa. Integer eu vulputate quam, ac tristique arcu. Fusce et justo vitae nulla maximus lacinia. Integer dolor massa, finibus vel mattis a, porta eu erat. Suspendisse potenti. Sed ex magna, imperdiet sit amet augue ac, vestibulum vehicula mi. Praesent sapien neque, bibendum eget lorem fermentum, molestie semper leo.' | |
words_10 = %w(foo bar string amp baguette marine grenadine quezac bambou panda) | |
words_100 = words_10 * 10 | |
regexp_10 = Regexp.new(words_10.join('|'), Regexp::IGNORECASE) | |
regexp_100 = Regexp.new(words_100.join('|'), Regexp::IGNORECASE) | |
r_array_10 = words_10.map { |w| /\b#{w}\b/i } | |
r_array_100 = words_100.map { |w| /\b#{w}\b/i } | |
puts 'Worst case: no element found' | |
puts "Text size: #{text.size}" | |
Benchmark.ips do |x| | |
x.report('regexp_10') { (regexp_10 =~ text) != nil } | |
x.report('regexp_100') { (regexp_100 =~ text) != nil } | |
x.report('regexp_loop_10') { | |
r_array_10.each do |foul| | |
break(true) if foul =~ text | |
end | |
false | |
} | |
x.report('regexp_loop_100') { | |
r_array_100.each do |foul| | |
break(true) if foul =~ text | |
end | |
false | |
} | |
x.report('array#&_10') { (words_10 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
x.report('array#&_100') { (words_100 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
x.report('array_loop_10') { | |
text.split(/\W+/).each do |word| | |
word.downcase! | |
break(true) if words_10.include?(word) | |
end | |
false | |
} | |
x.report('array_loop_100') { | |
text.split(/\W+/).each do |word| | |
word.downcase! | |
break(true) if words_100.include?(word) | |
end | |
false | |
} | |
end | |
puts | |
words_10 = %w(integer bar string amp baguette marine grenadine quezac bambou panda) | |
words_100 = words_10 * 10 | |
regexp_10 = Regexp.new(words_10.join('|'), Regexp::IGNORECASE) | |
regexp_100 = Regexp.new(words_100.join('|'), Regexp::IGNORECASE) | |
r_array_10 = words_10.map { |w| /\b#{w}\b/i } | |
r_array_100 = words_100.map { |w| /\b#{w}\b/i } | |
puts 'Best case: first element match' | |
puts "Text size: #{text.size}" | |
Benchmark.ips do |x| | |
x.report('regexp_10') { (regexp_10 =~ text) != nil } | |
x.report('regexp_100') { (regexp_100 =~ text) != nil } | |
x.report('regexp_loop_10') { | |
r_array_10.each do |foul| | |
break(true) if foul =~ text | |
end | |
false | |
} | |
x.report('regexp_loop_100') { | |
r_array_100.each do |foul| | |
break(true) if foul =~ text | |
end | |
false | |
} | |
x.report('array#&_10') { (words_10 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
x.report('array#&_100') { (words_100 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
x.report('array_loop_10') { | |
text.split(/\W+/).each do |word| | |
word.downcase! | |
break(true) if words_10.include?(word) | |
end | |
false | |
} | |
x.report('array_loop_100') { | |
text.split(/\W+/).each do |word| | |
word.downcase! | |
break(true) if words_100.include?(word) | |
end | |
false | |
} | |
end | |
# Benchmark.compare *result |
Author
ccocchi
commented
Feb 7, 2015
Changing
r_array_10 = words_10.map { |w| /\b#{w}\b/i } r_array_100 = words_100.map { |w| /\b#{w}\b/i }
for
r_array_10 = words_10.map { |w| /#{w}/i } r_array_100 = words_100.map { |w| /#{w}/i }
To be equivalent.
Worst case: no element found Text size: 789 Calculating ------------------------------------- regexp_10 1.608k i/100ms regexp_100 216.000 i/100ms regexp_loop_10 3.939k i/100ms regexp_loop_100 400.000 i/100ms array#&_10 869.000 i/100ms array#&_100 785.000 i/100ms array_loop_10 752.000 i/100ms array_loop_100 163.000 i/100ms ------------------------------------------------- regexp_10 16.428k (± 4.2%) i/s - 83.616k regexp_100 2.149k (± 5.3%) i/s - 10.800k regexp_loop_10 40.490k (± 4.3%) i/s - 204.828k regexp_loop_100 4.011k (± 4.8%) i/s - 20.400k array#&_10 8.826k (± 4.7%) i/s - 44.319k array#&_100 8.102k (± 4.2%) i/s - 40.820k array_loop_10 7.459k (± 4.2%) i/s - 37.600k array_loop_100 1.633k (± 4.2%) i/s - 8.150k Best case: first element match Text size: 789 Calculating ------------------------------------- regexp_10 4.982k i/100ms regexp_100 668.000 i/100ms regexp_loop_10 63.389k i/100ms regexp_loop_100 64.403k i/100ms array#&_10 879.000 i/100ms array#&_100 794.000 i/100ms array_loop_10 1.282k i/100ms array_loop_100 507.000 i/100ms ------------------------------------------------- regexp_10 50.999k (± 3.9%) i/s - 259.064k regexp_100 6.781k (± 5.1%) i/s - 34.068k regexp_loop_10 1.139M (± 5.2%) i/s - 5.705M regexp_loop_100 1.133M (± 6.3%) i/s - 5.667M array#&_10 8.770k (± 4.0%) i/s - 43.950k array#&_100 8.210k (± 4.4%) i/s - 41.288k array_loop_10 13.040k (± 4.3%) i/s - 65.382k array_loop_100 5.152k (± 5.5%) i/s - 25.857k
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment