Last active
August 29, 2015 14:11
-
-
Save jl2/ad4eb2518d0fbab723fa to your computer and use it in GitHub Desktop.
Association list based trie data structure in Common Lisp. Faster more memory efficient, and handles unicode better than the vector based trie I tried before.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;;; package.lisp | |
(defpackage #:trie | |
(:use #:cl) | |
(:export #:create | |
#:add | |
#:contains | |
#:words | |
#:with-prefix | |
#:from-file | |
#:to-graph)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;;; trie.asd | |
(asdf:defsystem #:trie | |
:description "Simple trie data structure in Common Lisp" | |
:author "Jeremiah LaRocco <[email protected]>" | |
:license "ISC" | |
:serial t | |
:depends-on (#:cl-ppcre) | |
:components ((:file "package") | |
(:file "trie"))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;;; trie.lisp | |
(in-package #:trie) | |
(defstruct trie | |
(is-last nil) | |
(children '() :type list)) | |
(defun add (tr word) | |
"Add the word to the trie." | |
(loop | |
with ct = tr | |
for c across word | |
do | |
(if (not (assoc c (trie-children ct))) | |
(setf (trie-children ct) (acons c (make-trie) (trie-children ct)))) | |
(setf ct (cdr (assoc c (trie-children ct)))) | |
finally (setf (trie-is-last ct) t))) | |
(defun contains (tr word) | |
"Test if the word is in the trie." | |
(loop | |
with ct = tr | |
for c across word | |
do | |
(if (assoc c (trie-children ct)) | |
(setf ct (cdr (assoc c (trie-children ct)))) | |
(return-from contains nil)) | |
finally (return-from contains (trie-is-last ct)))) | |
(defun words (tr start) | |
"Find all words in the trie, assuming they start with the word start." | |
(let ((rval nil)) | |
(if (trie-is-last tr) | |
(setf rval (cons start rval))) | |
(dolist (nt (trie-children tr)) | |
(setf rval (append rval (words (cdr nt) (concatenate 'string start (string (car nt))))))) | |
rval)) | |
(defun with-prefix (tr prefix) | |
"Find words in the trie that start with the specified prefix." | |
(loop | |
with ct = tr | |
for c across prefix | |
do | |
(if (assoc c (trie-children ct)) | |
(setf ct (cdr (assoc c (trie-children ct)))) | |
(return-from with-prefix nil)) | |
finally (return-from with-prefix (words ct prefix)))) | |
(defun from-file (fname) | |
"Create a trie and populate it with the words from the specified file." | |
(with-open-file | |
(inf fname) | |
(loop | |
with tr = (make-trie) | |
for line = (read-line inf nil) | |
while line | |
do | |
(dolist (word (ppcre:split "\\s+" (ppcre:regex-replace-all "[-?!_%$@*\"',\\/.]" line ""))) | |
(add tr word)) | |
finally (return tr)))) | |
(defun create (&rest words) | |
"Create a trie data structure, and optionally populate it with the given words." | |
(let ((rval (make-trie))) | |
(dolist (word words) | |
(add rval word)) | |
rval)) | |
(defun to-graph-inner (prefix tr stream) | |
"Helper function for generating GraphViz graph files from a trie." | |
(dolist (n (trie-children tr)) | |
(let* ((next-letter (car n)) | |
(next-word (concatenate 'string prefix (string next-letter))) | |
(shape (if (trie-is-last (cdr n)) "doublecircle" "circle"))) | |
(format stream "\"~a\" -> \"~a\"~%\"~a\" [label=\"~a\" shape=~a]~%" prefix next-word next-word next-letter shape) | |
(to-graph-inner next-word (cdr n) stream)))) | |
(defun to-graph (tr fname) | |
"Generate a graph in GraphViz format for the trie." | |
(with-open-file | |
(stream fname :direction :output :if-exists :supersede :if-does-not-exist :create) | |
(format stream "digraph {~%\"start_node\" [label=\"\" shape=circle]~%") | |
(dolist (n (trie-children tr)) | |
(let ((shape (if (trie-is-last (cdr n)) "doublecircle" "circle")) | |
(this-letter (car n))) | |
(format stream "\"start_node\" -> \"~a\"~%\"~a\" [label=\"~a\" shape=~a]~%" this-letter this-letter this-letter shape) | |
(to-graph-inner (string this-letter) (cdr n) stream))) | |
(format stream "}~%"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment