Skip to content

Instantly share code, notes, and snippets.

@jordonbiondo
Last active February 6, 2023 10:51
Show Gist options
  • Save jordonbiondo/111af9c304725391e378 to your computer and use it in GitHub Desktop.
Save jordonbiondo/111af9c304725391e378 to your computer and use it in GitHub Desktop.
norvig elisp spell checker
;; norvig checker in elisp
;; barely modified from https://github.com/mikaelj/snippets/blob/master/lisp/spellcheck/spellcheck.lisp
;; watch it run here: http://i.imgur.com/guuVT2O.gif
(defun file-words (file)
(let ((words nil))
(with-temp-buffer
(insert-file-contents file)
(goto-char 1)
(while (re-search-forward "[a-z]+" nil t 1)
(push (downcase (match-string 0)) words)))
words))
(defun train (words)
(let ((frequency (make-hash-table :test 'equal)))
(dolist (word words)
(setf (gethash word frequency) (1+ (gethash word frequency 1))))
frequency))
(defvar *freq* (train (file-words "big.txt")))
(defvar *alphabet* "abcdefghijklmnopqrstuvwxyz")
;;; edits of one character
(defun edits-1 (word)
(let* ((splits (loop for i from 0 upto (length word)
collecting (cons (subseq word 0 i) (subseq word i))))
(deletes (loop for (a . b) in splits
when (not (zerop (length b)))
collect (concatenate 'string a (subseq b 1))))
(transposes (loop for (a . b) in splits
when (> (length b) 1)
collect (concatenate 'string a (subseq b 1 2) (subseq b 0 1) (subseq b 2))))
(replaces (loop for (a . b) in splits
nconcing (loop for c across *alphabet*
when (not (zerop (length b)))
collect (concatenate 'string a (string c) (subseq b 1)))))
(inserts (loop for (a . b) in splits
nconcing (loop for c across *alphabet*
collect (concatenate 'string a (string c) b)))))
(nconc deletes transposes replaces inserts)))
(defun known-edits-2 (word)
(let ((edits nil))
(dolist (e1 (edits-1 word))
(dolist (e2 (edits-1 e1))
(let ((n (gethash e2 *freq*)))
(when n
(push (list n e2) edits)))))
edits))
(defun known (words)
(let ((known nil))
(dolist (word words)
(let ((n (gethash word *freq*)))
(when n
(push (list n word) known))))
known))
(defun correct (word)
(let ((word (downcase word)))
(let ((words (or (known (list word)) (known (edits-1 word)) (known-edits-2 word) (list (list 1 word)))))
(subseq (sort words (lambda (a b) (> (car a) (car b)))) 0 (min (length words) 5)))))
(require 'pulse)
(defun my-correct-word ()
(interactive)
(let* ((word (thing-at-point 'word t))
(candidates (correct (downcase word))))
(if (and (= (length candidates) 1)
(equal word (cadar candidates)))
(message "all good!")
(let ((chosen (ido-completing-read "choices: " (mapcar 'cadr candidates))))
(let* ((bounds (bounds-of-thing-at-point 'word))
(beg (car bounds))
(end (cdr bounds)))
(delete-region beg end)
(insert chosen)
(pulse-momentary-highlight-region beg (point))
(forward-word -1)
(forward-word 1))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment