Last active
February 6, 2023 10:51
-
-
Save jordonbiondo/111af9c304725391e378 to your computer and use it in GitHub Desktop.
norvig elisp spell checker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; norvig checker in elisp | |
;; barely modified from https://github.com/mikaelj/snippets/blob/master/lisp/spellcheck/spellcheck.lisp | |
;; watch it run here: http://i.imgur.com/guuVT2O.gif | |
(defun file-words (file) | |
(let ((words nil)) | |
(with-temp-buffer | |
(insert-file-contents file) | |
(goto-char 1) | |
(while (re-search-forward "[a-z]+" nil t 1) | |
(push (downcase (match-string 0)) words))) | |
words)) | |
(defun train (words) | |
(let ((frequency (make-hash-table :test 'equal))) | |
(dolist (word words) | |
(setf (gethash word frequency) (1+ (gethash word frequency 1)))) | |
frequency)) | |
(defvar *freq* (train (file-words "big.txt"))) | |
(defvar *alphabet* "abcdefghijklmnopqrstuvwxyz") | |
;;; edits of one character | |
(defun edits-1 (word) | |
(let* ((splits (loop for i from 0 upto (length word) | |
collecting (cons (subseq word 0 i) (subseq word i)))) | |
(deletes (loop for (a . b) in splits | |
when (not (zerop (length b))) | |
collect (concatenate 'string a (subseq b 1)))) | |
(transposes (loop for (a . b) in splits | |
when (> (length b) 1) | |
collect (concatenate 'string a (subseq b 1 2) (subseq b 0 1) (subseq b 2)))) | |
(replaces (loop for (a . b) in splits | |
nconcing (loop for c across *alphabet* | |
when (not (zerop (length b))) | |
collect (concatenate 'string a (string c) (subseq b 1))))) | |
(inserts (loop for (a . b) in splits | |
nconcing (loop for c across *alphabet* | |
collect (concatenate 'string a (string c) b))))) | |
(nconc deletes transposes replaces inserts))) | |
(defun known-edits-2 (word) | |
(let ((edits nil)) | |
(dolist (e1 (edits-1 word)) | |
(dolist (e2 (edits-1 e1)) | |
(let ((n (gethash e2 *freq*))) | |
(when n | |
(push (list n e2) edits))))) | |
edits)) | |
(defun known (words) | |
(let ((known nil)) | |
(dolist (word words) | |
(let ((n (gethash word *freq*))) | |
(when n | |
(push (list n word) known)))) | |
known)) | |
(defun correct (word) | |
(let ((word (downcase word))) | |
(let ((words (or (known (list word)) (known (edits-1 word)) (known-edits-2 word) (list (list 1 word))))) | |
(subseq (sort words (lambda (a b) (> (car a) (car b)))) 0 (min (length words) 5))))) | |
(require 'pulse) | |
(defun my-correct-word () | |
(interactive) | |
(let* ((word (thing-at-point 'word t)) | |
(candidates (correct (downcase word)))) | |
(if (and (= (length candidates) 1) | |
(equal word (cadar candidates))) | |
(message "all good!") | |
(let ((chosen (ido-completing-read "choices: " (mapcar 'cadr candidates)))) | |
(let* ((bounds (bounds-of-thing-at-point 'word)) | |
(beg (car bounds)) | |
(end (cdr bounds))) | |
(delete-region beg end) | |
(insert chosen) | |
(pulse-momentary-highlight-region beg (point)) | |
(forward-word -1) | |
(forward-word 1)))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment