Skip to content

Instantly share code, notes, and snippets.

@jl2
Last active August 29, 2015 14:11
Show Gist options
  • Save jl2/ad4eb2518d0fbab723fa to your computer and use it in GitHub Desktop.
Save jl2/ad4eb2518d0fbab723fa to your computer and use it in GitHub Desktop.
Association list based trie data structure in Common Lisp. Faster more memory efficient, and handles unicode better than the vector based trie I tried before.
;;;; package.lisp
(defpackage #:trie
(:use #:cl)
(:export #:create
#:add
#:contains
#:words
#:with-prefix
#:from-file
#:to-graph))
;;;; trie.asd
(asdf:defsystem #:trie
:description "Simple trie data structure in Common Lisp"
:author "Jeremiah LaRocco <[email protected]>"
:license "ISC"
:serial t
:depends-on (#:cl-ppcre)
:components ((:file "package")
(:file "trie")))
;;;; trie.lisp
(in-package #:trie)
(defstruct trie
(is-last nil)
(children '() :type list))
(defun add (tr word)
"Add the word to the trie."
(loop
with ct = tr
for c across word
do
(if (not (assoc c (trie-children ct)))
(setf (trie-children ct) (acons c (make-trie) (trie-children ct))))
(setf ct (cdr (assoc c (trie-children ct))))
finally (setf (trie-is-last ct) t)))
(defun contains (tr word)
"Test if the word is in the trie."
(loop
with ct = tr
for c across word
do
(if (assoc c (trie-children ct))
(setf ct (cdr (assoc c (trie-children ct))))
(return-from contains nil))
finally (return-from contains (trie-is-last ct))))
(defun words (tr start)
"Find all words in the trie, assuming they start with the word start."
(let ((rval nil))
(if (trie-is-last tr)
(setf rval (cons start rval)))
(dolist (nt (trie-children tr))
(setf rval (append rval (words (cdr nt) (concatenate 'string start (string (car nt)))))))
rval))
(defun with-prefix (tr prefix)
"Find words in the trie that start with the specified prefix."
(loop
with ct = tr
for c across prefix
do
(if (assoc c (trie-children ct))
(setf ct (cdr (assoc c (trie-children ct))))
(return-from with-prefix nil))
finally (return-from with-prefix (words ct prefix))))
(defun from-file (fname)
"Create a trie and populate it with the words from the specified file."
(with-open-file
(inf fname)
(loop
with tr = (make-trie)
for line = (read-line inf nil)
while line
do
(dolist (word (ppcre:split "\\s+" (ppcre:regex-replace-all "[-?!_%$@*\"',\\/.]" line "")))
(add tr word))
finally (return tr))))
(defun create (&rest words)
"Create a trie data structure, and optionally populate it with the given words."
(let ((rval (make-trie)))
(dolist (word words)
(add rval word))
rval))
(defun to-graph-inner (prefix tr stream)
"Helper function for generating GraphViz graph files from a trie."
(dolist (n (trie-children tr))
(let* ((next-letter (car n))
(next-word (concatenate 'string prefix (string next-letter)))
(shape (if (trie-is-last (cdr n)) "doublecircle" "circle")))
(format stream "\"~a\" -> \"~a\"~%\"~a\" [label=\"~a\" shape=~a]~%" prefix next-word next-word next-letter shape)
(to-graph-inner next-word (cdr n) stream))))
(defun to-graph (tr fname)
"Generate a graph in GraphViz format for the trie."
(with-open-file
(stream fname :direction :output :if-exists :supersede :if-does-not-exist :create)
(format stream "digraph {~%\"start_node\" [label=\"\" shape=circle]~%")
(dolist (n (trie-children tr))
(let ((shape (if (trie-is-last (cdr n)) "doublecircle" "circle"))
(this-letter (car n)))
(format stream "\"start_node\" -> \"~a\"~%\"~a\" [label=\"~a\" shape=~a]~%" this-letter this-letter this-letter shape)
(to-graph-inner (string this-letter) (cdr n) stream)))
(format stream "}~%")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment