Skip to content

Instantly share code, notes, and snippets.

@melborne
Created March 26, 2009 08:15
Show Gist options
  • Select an option

  • Save melborne/85967 to your computer and use it in GitHub Desktop.

Select an option

Save melborne/85967 to your computer and use it in GitHub Desktop.
class String
def drop_char(i)
self[0...i] + self[i+1..-1]
end
def trans_char(i)
self[0...i] + self[i+1] + self[i] + self[i+2..-1]
end
def alt_char(i, char)
self.drop_char(i).insert(i, char)
end
def insert_char(i, char)
self.clone.insert(i, char)
end
end
def words(text)
text.downcase.scan(/[a-z]+/)
end
def train(features)
features.inject(Hash.new(1)) { |mem, var| mem[var] += 1; mem }
end
NWORDS = train(words(open('big.txt').read))
def edits1(word)
n = word.length
mem = []
(0..n).each do |i|
mem << word.drop_char(i) unless i >= n
mem << word.trans_char(i) unless i >= n-1
('a'..'z').each { |c| mem << word.alt_char(i, c) unless i >= n }
('a'..'z').each { |c| mem << word.insert_char(i, c)}
end
mem.empty? ? nil : mem
end
def known_edits2(word)
mem = edits1(word).map { |w| edits1(w) }.flatten & NWORDS.keys
mem.empty? ? nil : mem
end
def known(words)
(w = words & NWORDS.keys).empty? ? nil : w
end
def correct(word)
candidates = (known([word]) or known(edits1(word)) or known_edits2(word) or [word])
candidates.max_by { |w| NWORDS[w] }
end
def spelltest(tests, bias=false, verbose=false)
n, bad, unknown, start = 0, 0, 0, Time.now
if bias
tests.each { |target, v| NWORDS[target] += bias }
end
tests.each do |target, wrongs|
target = target.to_s
wrongs.split(" ").each do |wrong|
n += 1
w = correct(wrong)
if w != target
bad += 1
unknown += 1 if NWORDS[target] == 1
if verbose
puts "#{wrong} => #{w}(#{NWORDS[w]}); expected #{target}(#{NWORDS[target]})"
end
end
end
end
{bad: "#{bad}", n: "#{n}", bias: "#{bias}", pct: "#{100-100*bad/n}", unknown: "#{unknown}", secs: "#{(Time.now-start).to_i}"}
end
tests1 = { access: 'acess', accessing: 'accesing', accommodation:
'accomodation acommodation acomodation', account: 'acount', address:
'adress adres', addressable: 'addresable', arranged: 'aranged arrainged',
arrangeing: 'aranging', arrangement: 'arragment', articles: 'articals',
aunt: 'annt anut arnt', auxiliary: 'auxillary', available: 'avaible',
awful: 'awfall afful', basically: 'basicaly', beginning: 'begining',
benefit: 'benifit', benefits: 'benifits', between: 'beetween', bicycle:
'bicycal bycicle bycycle', biscuits:
'biscits biscutes biscuts bisquits buiscits buiscuts', built: 'biult',
cake: 'cak', career: 'carrer',
cemetery: 'cemetary semetary', centrally: 'centraly', certain: 'cirtain',
challenges: 'chalenges chalenges', chapter: 'chaper chaphter chaptur',
choice: 'choise', choosing: 'chosing', clerical: 'clearical',
committee: 'comittee', compare: 'compair', completely: 'completly',
consider: 'concider', considerable: 'conciderable', contented:
'contenpted contende contended contentid', curtains:
'cartains certans courtens cuaritains curtans curtians curtions', decide: 'descide', decided:
'descided', definitely: 'definately difinately', definition: 'defenition',
definitions: 'defenitions', description: 'discription', desiccate:
'desicate dessicate dessiccate', diagrammatically: 'diagrammaticaally',
different: 'diffrent', driven: 'dirven', ecstasy: 'exstacy ecstacy',
embarrass: 'embaras embarass', establishing: 'astablishing establising',
experience: 'experance experiance', experiences: 'experances', extended:
'extented', extremely: 'extreamly', fails: 'failes', families: 'familes',
february: 'febuary', further: 'futher', gallery: 'galery gallary gallerry gallrey',
hierarchal: 'hierachial', hierarchy: 'hierchy', inconvenient:
'inconvienient inconvient inconvinient', independent: 'independant independant',
initial: 'intial', initials: 'inetials inistals initails initals intials',
juice: 'guic juce jucie juise juse', latest: 'lates latets latiest latist',
laugh: 'lagh lauf laught lugh', level: 'leval',
levels: 'levals', liaison: 'liaision liason', lieu: 'liew', literature:
'litriture', loans: 'lones', locally: 'localy', magnificent:
'magnificnet magificent magnifcent magnifecent magnifiscant magnifisent magnificant',
management: 'managment', meant: 'ment', minuscule: 'miniscule',
minutes: 'muinets', monitoring: 'monitering', necessary:
'neccesary necesary neccesary necassary necassery neccasary', occurrence:
'occurence occurence', often: 'ofen offen offten ofton', opposite:
'opisite oppasite oppesite oppisit oppisite opposit oppossite oppossitte', parallel:
'paralel paralell parrallel parralell parrallell', particular: 'particulaur',
perhaps: 'perhapse', personnel: 'personnell', planned: 'planed', poem:
'poame', poems: 'poims pomes', poetry: 'poartry poertry poetre poety powetry',
position: 'possition', possible: 'possable', pretend:
'pertend protend prtend pritend', problem: 'problam proble promblem proplen',
pronunciation: 'pronounciation', purple: 'perple perpul poarple',
questionnaire: 'questionaire', really: 'realy relley relly', receipt:
'receit receite reciet recipt', receive: 'recieve', refreshment:
'reafreshment refreshmant refresment refressmunt', remember: 'rember remeber rememmer rermember',
remind: 'remine remined', scarcely: 'scarcly scarecly scarely scarsely',
scissors: 'scisors sissors', separate: 'seperate',
singular: 'singulaur', someone: 'somone', sources: 'sorces', southern:
'southen', special: 'speaical specail specal speical', splendid:
'spledid splended splened splended', standardizing: 'stanerdizing', stomach:
'stomac stomache stomec stumache', supersede: 'supercede superceed', there: 'ther',
totally: 'totaly', transferred: 'transfred', transportability:
'transportibility', triangular: 'triangulaur', understand: 'undersand undistand',
unexpected: 'unexpcted unexpeted unexspected', unfortunately:
'unfortunatly', unique: 'uneque', useful: 'usefull', valuable: 'valubale valuble',
variable: 'varable', variant: 'vairiant', various: 'vairious',
visited: 'fisited viseted vistid vistied', visitors: 'vistors',
voluntary: 'volantry', voting: 'voteing', wanted: 'wantid wonted',
whether: 'wether', wrote: 'rote wote'}
if $0 == __FILE__
puts spelltest(tests1)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment