Skip to content

Instantly share code, notes, and snippets.

@sentientmonkey
Last active August 9, 2022 20:20
Show Gist options
  • Select an option

  • Save sentientmonkey/374dda01da8bc9afb799 to your computer and use it in GitHub Desktop.

Select an option

Save sentientmonkey/374dda01da8bc9afb799 to your computer and use it in GitHub Desktop.
Metaphone vs. Levenshtien shootout
#!/usr/bin/env ruby -w
require 'benchmark/bigo'
require 'rubyfish'
require_relative 'typo_suggestor'
require_relative 'levenshtien_suggestion'
require_relative 'metaphone_suggestion'
Benchmark.bigo do |x|
x.increments = 6
x.generator {|size| TypoSuggestor.new(size) }
x.exponential 10
x.report "levenshtien" do |generated, size|
generated.suggest_typos do |words, typo|
LevenshteinSuggestion.new(words).suggest(typo)
end
end
x.report "metaphone" do |generated, size|
generated.suggest_typos do |words, typo|
MetaphoneSuggestion.new(words).suggest(typo)
end
end
x.data! 'chart_array_simple.json'
x.chart! 'chart_array_simple.html'
end
1 10 100 1000 10000 100000
levenshtien 39.683940568375625 171.1127078473862 1376.4100731580565 13581.684563405996 136372.4450114888 1357479.0120124817
metaphone 18.56271495457697 51.526448478866506 378.2996328835139 3744.1742086841964 38024.81372190674 390208.69731903076
require 'rubyfish'
require 'set'
class LevenshteinSuggestion
attr_reader :words
def initialize(words)
@words = words
end
def suggest(typo)
max = typo.length / 2
distances = []
words.each do |word|
distance = RubyFish::Levenshtein.distance typo, word
if distance < max
distances << [word, distance]
end
end
suggestions = distances.uniq.sort_by{ |name, dist| dist }
suggestions.first(5).map(&:first)
end
end
class MetaphoneSuggestion
attr_reader :metaphones
def initialize(words)
@metaphones = {}
words.each do |word|
next unless word
codes = RubyFish::DoubleMetaphone.phonetic_code word
codes.each do |code|
if code
metaphones[code] ||= []
metaphones[code] << word
end
end
end
end
def suggest(typo)
suggestions = []
codes = RubyFish::DoubleMetaphone.phonetic_code typo
codes.each do |code|
words = metaphones[code]
if words
suggestions = suggestions + words
end
end
suggestions
end
end
class TypoSuggestor
attr_reader :typos, :words, :corrections
def initialize(size)
@typos = ["reils", "pwerline", "parte"]
@corrections = ["rails", "powerline", "party"]
lines = File.readlines '/usr/share/dict/words'
@words = lines.map(&:chomp)
@words = @words.shuffle.first(size)
@words = @words + @corrections
@words = @words.uniq.sort
end
def suggest_typos
typos.each_with_index do |typo, index|
suggestions = yield words, typo
unless suggestions.include? corrections[index]
$stderr.puts "#{corrections[index]} not found in #{suggestions.join(', ')}"
end
suggestions
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment