Skip to content

Instantly share code, notes, and snippets.

@timm
Last active April 6, 2023 01:53
Show Gist options
  • Save timm/9601bfb8ae4267c0d1ffb0ad580a8655 to your computer and use it in GitHub Desktop.
Save timm/9601bfb8ae4267c0d1ffb0ad580a8655 to your computer and use it in GitHub Desktop.
xfun : semi-supervised multi-objective explanation (in LISP)
; vim: set ts=2 sw=2 sts=2 et :
(load "lib")
(defvar *help* "
xfun.lisp: LISP code for multi-objective semi-supervised explanations
OPTIONS:
-b bins max bin numbers = 16
-f file csv file = ../data/auto93.csv
-g go start up action = nothing
-h help show help = nil
-K k Bayesian K = 1
-m max max num cached = 512
-M m Bayesian M = 2
-p p dist coefficient = 2
-s seed random number seed = 10013")
(setf *settings* (settings *help*))
;-------------------------------------------------------------------------------
;## Structs
(defstruct data
"stores `rows`, summarized in `cols`"
rows cols)
(defstruct cols
"stores everything in `all`, independent/dependent things in `x`,`y`"
all x y names klass)
(defstruct num
"summarizes a stream of numbers"
(at 0) (txt "") (n 0) (w 1) ; w=1,-1 means "maximize", "minimize"
(hi most-negative-fixnum)
(lo most-positive-fixnum)
(mu 0) (m2 0))
(defstruct sym
"summarizes a stream of symbols"
(at 0) (txt "") (n 0) (w 1) has (most 0) mode)
;-------------------------------------------------------------------------------
;## Create
(defun lst->cols (lst &aux (self (make-cols :names lst)))
"column names to cols, then added to `all` and (maybe) `x`, `y`, and `klass`"
(with-slots (all x y klass) self
(loop :for at :from 0 :and txt :in lst :do
(let* ((isNum (and (> (length txt) 0) (upper-case-p (char txt 0))))
(what (if isNum #'make-num #'make-sym))
(col (funcall what :at at :txt txt :w (if (got txt -1 #\-) -1 1))))
(push col all)
(unless (got txt -1 #\X)
(if (got txt -1 #\! txt) (setf klass col))
(if (got txt -1 #\! #\+ #\-) (push col y) (push col x))))))
self)
(defun src->data (src &optional rows &aux (self (make-data)))
"from file if (stringp src); from list if (consp src); mimic structure if (data-p src)"
(labels ((row (x) (add self x)))
(cond ((stringp src) (with-file src #'row))
((consp src) (mapc #'row src))
((data-p src) (row (cols-names (data-cols src)))))
(mapc #'row rows)
self))
;-------------------------------------------------------------------------------
;## Add
(defmethod add ((self data) lst)
"updates `rows` and `cols`"
(aif (data-cols self)
(push (add it lst) (data-rows self))
(setf (data-cols self) (lst->cols lst))))
(defmethod add ((self cols) lst)
"update nums and syms"
(dolist (tmp `(,(cols-x self) ,(cols-y self)) lst)
(dolist (col tmp)
(add col (elt lst (slot-value col 'at))))))
(defmethod add ((self sym) x)
"update frequency counts (in `has`) and `most` and `mode`"
(with-slots (has n mode most) self
(unless (eql x #\?)
(incf n)
(incf (freq x has))
(if (> (freq x has) most) (setf most (freq x has) mode x)))))
(defmethod add ((self num) x ) ;;; Add one thing, updating 'lo,hi'
"updates `lo`, `hi`, `mu`, `sd`"
(with-slots (n lo hi mu m2) self
(unless (eq x #\?)
(incf n)
(let ((d (- x mu)))
(incf mu (/ d n))
(incf m2 (* d (- x mu)))
(setf lo (min x lo)
hi (max x hi))))))
;-------------------------------------------------------------------------------
;## Queries
(defmethod mid ((self sym)) (sym-mode self))
(defmethod mid ((self num)) (num-mu self))
(defmethod div ((self sym))
"diversity (entropy)."
(with-slots (has n) self
(labels ((fun (p) (if (<= p 0) 0 (* -1 (* p (log p 2))))))
(loop for (_ . n1) in has sum (fun (/ n1 n))))))
(defmethod div ((self num))
"return standard deviation"
(with-slots (n m2) self (if (<= n 1) 0 (sqrt (/ m2 (- n 1))))))
(defmethod like1 ((self sym) x prior)
(with-slots (n has) self
(/ (+ (freq x has) (* (? m) prior))
(+ n (? m)))))
(defmethod like1 ((self num) x _)
(with-slots (mu n) self
(let ((sd (div self))
(tiny 1E-32))
(cond ((< x (- mu (* 4 sd))) 0)
((> x (+ mu (* 4 sd))) 0)
(t (let ((denom (sqrt (* 2 pi sd sd)))
(nom (exp (/ (* -1 (expt (- x mu) 2))
(+ tiny (* 2 sd sd))))))
(/ nom (+ denom tiny))))))))
'(defmethod like ((self data) row nall nh)
(with-slots (rows cols) self
(let ((prior (/ (1+ (? k) (length rows)) (+ nall (* nh (? k)))))
(+ (log prior)
(loop :for col :in (cols-x cols) :sum
(let ((x (elt row (col-at col))))
(if (eql x #\?)
0
(log (like1 col x prior))))))))))
(defmethod classify ((self data) row hs &aux out (most most-negative-fixnum))
(dolist (h hs (values out most))
(let ((tmp (like h row (data-n self) (1+ (length hs)))))
(if (> tmp most) (setq most tmp
out h)))))
; vi: set ts=2 sw=2 sts=2 et :
;## Globals
(defvar *settings* nil)
;## Macros
(defmacro ? (x &optional (lst '*settings*))
"alist accessor, defaults to searching `*settings*`"
`(cdr (assoc ',x ,lst :test #'equalp)))
(defmacro aif (test then &optional else)
"used to test on a result that is also needed by `then`"
`(let ((it ,test)) (if it ,then ,else)))
(defmacro freq (x lst &optional (init 0))
"frequency counts for small group of symbols (say, less than 50)"
`(cdr (or (assoc ,x ,lst :test #'equal)
(car (setf ,lst (cons (cons ,x ,init) ,lst))))))
;-------------------------------------------------------------------------------
;## Sys
(defun args ()
"accessing command-line flats"
#+clisp ext:*args*
#+sbcl sb-ext:*posix-argv*)
(defun goodbye (&optional (x 0))
"quit list"
#+clisp (ext:exit x)
#+sbcl (sb-ext:exit :code x))
;-------------------------------------------------------------------------------
(defvar *seed* 10013)
(defun rand (&optional (n 1))
"random float 0.. < n"
(setf *seed* (mod (* 16807.0d0 *seed*) 2147483647.0d0))
(* n (- 1.0d0 (/ *seed* 2147483647.0d0))))
(defun rint (&optional (n 1) &aux (base 10000000000.0))
"random int 0..n-1"
(floor (* n (/ (rand base) base))))
;-------------------------------------------------------------------------------
;## Lists
(defun per (seq &optional (p .5))
(elt seq (floor (* (min .999999 (max 0 p)) (length seq)))))
;-------------------------------------------------------------------------------
;## Strings
(defun trim (s)
"kill whitespace at start, at end"
(string-trim '(#\Space #\Tab #\Newline) s))
(defun got (s n &rest chars)
"Does `s` hold any of `chars` at position `n` (negative `n` means 'from end of string')"
(let ((n (if (>= n 0) n (+ (length s) n))))
(if (and (stringp s) (>= (1- (length s)) n))
(dolist (c chars)
(if (eql c (char s n))
(return-from got t))))))
(defun split (s &optional (sep #\,) (filter #'thing) (here 0))
"split `s`, divided by `sep` filtered through `filter`"
(let* ((there (position sep s :start here))
(word (funcall filter (subseq s here there))))
(labels ((tail () (if there (split s sep filter (1+ there)))))
(if (equal word "") (tail) (cons word (tail))))))
(defun words (s)
"divide a string on space"
(split s #\Space #'trim))
;-------------------------------------------------------------------------------
;## Strings to Things
(defun thing (s &aux (s1 (trim s)))
"coerce `s` into a number or string or t or nil or #\?"
(cond ((equal s1 "?") #\?)
((equal s1 "t") t)
((equal s1 "nil") nil)
(t (let ((n (read-from-string s1 nil nil)))
(if (numberp n) n s1)))))
(defun with-file (file fun &optional (filter #'split))
"call `fun` for each line in `file`"
(with-open-file (s file)
(loop (funcall fun (funcall filter (or (read-line s nil) (return)))))))
;-------------------------------------------------------------------------------
;## Settings
(defun settings (s &optional args)
"for lines like ' -Key Flag ..... Default', return `(KEY . DEFAULT)`"
(loop
:for (flag key . lst)
:in (split s #\NewLine #'words)
:if (got flag 0 #\-)
:collect (cons (intern (string-upcase key))
(cli args flag (thing (car (last lst)))))))
(defun cli (lst flag b4)
"if `flag` in `lst`, then update `b4` from `lst`"
(aif (member flag lst :test #'equal)
(cond ((eql b4 t) nil)
((eql b4 nil) t)
(t (thing (second it))))
b4))
#!/usr/bin/env bash
rlwrap sbcl --noinform -
-include ../config/do.mk
DO_what= xfun: semi-supervised multi-objective explanation (in LISP)
DO_copyright= Copyright (c) 2023 Tim Menzies, BSD-2.
DO_repos= . ../config ../data
install: $(DO_repos) ## get related repos
brew install rlwrap clisp sbcl
../data:
(cd ..; git clone https://gist.github.com/d47b8699d9953eef14d516d6e54e742e.git data)
../config:
(cd ..; git clone https://gist.github.com/42f78b8beec9e98434b55438f9983ecc.git config)
#!/usr/bin/env bash
f=$1
shift
$(which sbcl) --noinform --script $f $* \
2> >( gawk '/^Backtrace / {exit} 1' )
; vi: set ts=2 sw=2 sts=2 et :
(load "code")
(defun tests ()
`((rand
,(lambda (&aux (n (make-num)))
(dotimes (i 1000) (add n (expt (rand) 2)))
(print (mid n))
(print (div n))
(assert (<= .35 (mid n) .36))
(assert (<= .30 (div n) .31))
t))
(num
,(lambda (&aux (n (make-num)))
(dotimes (i 1000) (add n i))
(assert (<= 498 (mid n) 502))))
(sym
,(lambda (&aux (s (make-sym)))
(dolist (x '(a a a a b b c)) (add s x))
(assert (<= 1.37 (div s) 1.38) () "sym")))
(data
,(lambda (&aux (d (src->data (? file))))
(assert (eql 398 (length (data-rows d))))
(assert (eql 4 (length (cols-x (data-cols d)))))))
))
(let ((fails 0)
(b4 (copy-tree (setf *settings* (settings *help* (args))))))
(if (? help)
(format t "~a~%" *help*)
(loop :for (key fun) :in (tests) :do
(setf *settings* (copy-tree b4)
*seed* (? seed))
(when (member (? go) (list "all" key)
:key #'string-downcase :test #'equalp)
(format t "~%⚠️ ~a ~a " key fun)
(cond ((funcall fun) (format t " PASSED ✅~%"))
(t (format t " FAILED ❌~%")
(incf fails))))))
(goodbye fails))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment