Created
September 9, 2011 03:23
-
-
Save nowl/1205421 to your computer and use it in GitHub Desktop.
Common Lisp code to generate random words based on next-letter frequency from a predefined corpus
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defparameter *valid-chars* '(#\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z)) | |
(defparameter *table* nil) | |
(defparameter *norm-table* nil) | |
(defun build-table () | |
(setf *table* (make-hash-table)) | |
(with-open-file (in #p"corpus.txt" :direction :input) | |
(loop with lower-c with c with p while (not (eql c :eof)) do | |
(setf c (read-char in nil :eof) | |
lower-c (and (characterp c) (char-downcase c))) | |
(when (member lower-c *valid-chars*) | |
(when p | |
(unless (gethash p *table*) | |
(setf (gethash p *table*) (make-hash-table))) | |
(let ((value (gethash lower-c (gethash p *table*)))) | |
(setf (gethash lower-c (gethash p *table*)) | |
(if value (1+ value) 1)))) | |
(setf p lower-c))))) | |
(defun sort-and-find-probs (list) | |
(let ((total (loop for val being the hash-value of list summing val))) | |
(sort (loop with sum = 0 for char being the hash-key of list using (hash-value val) collecting | |
(progn (incf sum (float (/ val total))) | |
(list char sum))) | |
#'< :key #'cadr))) | |
(defun build-norm-table () | |
(setf *norm-table* | |
(loop for char being the hash-key of *table* using (hash-value val) collecting | |
(list char (sort-and-find-probs val))))) | |
(defun build-case-aux (prob list) | |
`(cond ,@(loop for val in list collecting | |
`((< ,prob ,(second val)) ,(first val))))) | |
(defun build-case-aux2 (char prob) | |
`(case ,char | |
,@(loop for entry in *norm-table* collecting | |
(list (car entry) (build-case-aux prob (cadr entry)))))) | |
(defmacro build-case (char prob) | |
(build-case-aux2 char prob)) | |
(defun weighted-random-next-char (char) | |
(let ((p (random 1.0))) | |
(build-case char p))) | |
(defun random-choice (list) | |
(let ((r (random (length list)))) | |
(nth r list))) | |
(defun random-word (length &key (start-char nil)) | |
(unless start-char | |
(setf start-char (random-choice *valid-chars*))) | |
(coerce | |
(cons start-char | |
(loop for i below (1- length) collecting | |
(let ((next-char (weighted-random-next-char start-char))) | |
(setf start-char next-char)))) | |
'string)) | |
; (build-table) | |
; (build-norm-table) | |
; (random-word 10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment