Created
April 18, 2019 11:50
-
-
Save stulentsev/ba8f09318d4f683925584c84245f2316 to your computer and use it in GitHub Desktop.
spam filter: proof of concept
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tests = %w( | |
лошадь | |
horse | |
horse123 | |
h0rse | |
лошадb | |
slоn | |
) | |
def complicated(tests) | |
@character_set_union_intersection_rgx ||= %r{\b(?=[[:alpha:]]*[a-zA-Z])(?=[[:alpha:]]*[[:alpha:]&&[^a-zA-Z]])\b} | |
tests.grep(@character_set_union_intersection_rgx) | |
end | |
def naive(tests) | |
@all_letters_rgx ||= /\A[[:alpha:]]+\z/ | |
@latin_letters_rgx ||= /[a-zA-Z]/ | |
tests.select do |w| | |
if w =~ @all_letters_rgx | |
fs = w.gsub(@latin_letters_rgx, '') | |
fs.size != 0 && fs.size != w.size | |
end | |
end | |
end | |
require 'benchmark/ips' | |
Benchmark.ips do |x| | |
x.report("naive") do | |
naive(tests) | |
end | |
x.report("complicated") do | |
complicated(tests) | |
end | |
x.compare! | |
end | |
# >> Warming up -------------------------------------- | |
# >> naive 8.558k i/100ms | |
# >> complicated 19.474k i/100ms | |
# >> Calculating ------------------------------------- | |
# >> naive 88.259k (± 2.0%) i/s - 445.016k in 5.044214s | |
# >> complicated 204.001k (± 4.0%) i/s - 1.032M in 5.067678s | |
# >> | |
# >> Comparison: | |
# >> complicated: 204001.4 i/s | |
# >> naive: 88259.1 i/s - 2.31x slower | |
# >> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment