Created
February 24, 2014 19:07
-
-
Save ncweinhold/9194748 to your computer and use it in GitHub Desktop.
Small bloom filter example based on the example at http://en.literateprograms.org/Bloom_filter_(C)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defvar *hashfunctions* 2) | |
(defvar *m* 2500000) | |
(defvar *bloom* (make-array *m* :initial-element nil)) | |
(defun add-word (word) | |
(let ((downcase-word (string-downcase word))) | |
(setf (elt *bloom* (mod (sax-hash downcase-word) *m*)) t) | |
(setf (elt *bloom* (mod (sdbm-hash downcase-word) *m*)) t))) | |
(defun contains-word (word) | |
(let ((downcase-word (string-downcase word))) | |
(and (elt *bloom* (mod (sax-hash downcase-word) *m*)) | |
(elt *bloom* (mod (sdbm-hash downcase-word) *m*))))) | |
(defun sdbm-hash (word) | |
(let* ((hashvalue 0)) | |
(loop for c across word do (setf hashvalue (- (+ (char-code c) | |
(ash hashvalue 6) | |
(ash hashvalue 16)) | |
hashvalue))) | |
hashvalue)) | |
(defun sax-hash (word) | |
(let* ((hashvalue 0)) | |
(loop for c across word do (setf hashvalue (logxor hashvalue | |
(+ (ash hashvalue 5) | |
(ash hashvalue -2) | |
(char-code c))))) | |
hashvalue)) | |
(defun read-words-file (path) | |
(with-open-file (stream path) | |
(do ((word (read-line stream nil) | |
(read-line stream nil))) | |
((null word)) | |
(add-word word)))) | |
(defun spell (word) | |
(contains-word word)) | |
(read-words-file "/usr/share/dict/words") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment