Last active
December 23, 2015 23:59
-
-
Save theJenix/6713887 to your computer and use it in GitHub Desktop.
k-means clustering in LISP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; K-means Clustering | |
(defun diff-sq (x y) | |
(expt (- x y) 2)) | |
(defun squared-differences (l1 l2) | |
(mapcar (lambda(x y) (float (expt (- x y) 2))) l1 l2)) | |
(defun sum-squared-differences (l1 l2) | |
(reduce #'+ (squared-differences l1 l2))) | |
(defun laverage (args) | |
(when args | |
(/ (reduce #'+ args) (length args)))) | |
(defun argx (fun tuples) | |
(cadr (find (reduce fun tuples :key #'car) tuples :key #'car))) | |
; Takes in a list of tuples (2 element nested lists) in the following | |
; format: | |
; (value, argument) | |
; and returns the argument from the minimum value | |
(defun argmin (tuples) | |
(argx #'min tuples)) | |
(defun argmax (tuples) | |
(argx #'max tuples)) | |
; Return a list of distance,center tuples | |
(defun compute-distance (centers x) | |
(loop for center in centers collect (list (diff-sq center x) center))) | |
(defun assign-center (centers x) | |
(argmin (compute-distance centers x))) | |
; Given a set of centers and a point, assign each point to the one closest center | |
; Ties are broken by assigning x to the first center encountered | |
(defun assign-centers (centers xs) | |
(loop for x in xs collect (list x (assign-center centers x)))) | |
(defun estimate-center (assigned) | |
(laverage (mapcar #'car assigned))) | |
(defun estimate-centers (assigned centers) | |
(loop for c in centers | |
collect (estimate-center (remove-if (complement (lambda (x) (= (cadr x) c))) assigned)))) | |
(defun rand-range (minval maxval) | |
(+ minval (random (1+ (- maxval minval))))) | |
(defun generate-k-random (k minval maxval) | |
(format t "Generating ~d random values from between ~d and ~d~C" k minval maxval #\linefeed) | |
(loop for i from 1 upto k collect (rand-range minval maxval))) | |
(defun k-means-clustering (data k epsilon) | |
(let ((centers (generate-k-random k (reduce #'min data) (reduce #'max data)))) | |
(loop for i from 1 do | |
(format t "Iteration ~d" i) | |
(setf last_centers centers) | |
(setf assigned (assign-centers centers data)) | |
(setf centers (estimate-centers assigned centers)) | |
(setf sse (sum-squared-differences centers last_centers)) | |
(format t " - Change of ~f (SSD)~C" sse #\linefeed) | |
while (>= sse epsilon)) | |
(print (mapcar #'round centers)) | |
(identity centers))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment