benob · March 20, 2022 13:41
diff --git a/symspell.py b/symspell.py
 import re
 from collections import Counter, defaultdict

 # create lexicon with word frequency from big text
 def words(text): return re.findall(r'\w+', text.lower())
 WORDS = Counter(words(open('big.txt').read()))

 # generate all deletion edits, plus original
 def edits(word):
  splits  = [(word[:i], word[i:])    for i in range(len(word) + 1)]
  deletes = [L + R[1:]               for L, R in splits if R]
  return deletes + [word]

 # expand lexicon with edited words
 EXPANDED = defaultdict(list)
 for word in WORDS:
  for deletion in edits(word):
    EXPANDED[deletion].append(word)
 
 # compute word frequency
 def P(word, N=sum(WORDS.values())): 
    return WORDS[word] / N
 
 # match all 1-deletion edits from word to all 1-deletion edits from lexicon
 def correction(word):
  candidates = []

  for deletion in edits(word):
    candidates += EXPANDED[deletion]

  return max(set(candidates), key=P)

 print('lenght =>', correction('lenght'))
	import re
	from collections import Counter, defaultdict

	# create lexicon with word frequency from big text
	def words(text): return re.findall(r'\w+', text.lower())
	WORDS = Counter(words(open('big.txt').read()))

	# generate all deletion edits, plus original
	def edits(word):
	splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
	deletes = [L + R[1:] for L, R in splits if R]
	return deletes + [word]

	# expand lexicon with edited words
	EXPANDED = defaultdict(list)
	for word in WORDS:
	for deletion in edits(word):
	EXPANDED[deletion].append(word)

	# compute word frequency
	def P(word, N=sum(WORDS.values())):
	return WORDS[word] / N

	# match all 1-deletion edits from word to all 1-deletion edits from lexicon
	def correction(word):
	candidates = []

	for deletion in edits(word):
	candidates += EXPANDED[deletion]

	return max(set(candidates), key=P)

	print('lenght =>', correction('lenght'))