Basic multi-objective random projections, [email protected]
Last active
April 6, 2023 01:53
-
-
Save timm/9601bfb8ae4267c0d1ffb0ad580a8655 to your computer and use it in GitHub Desktop.
xfun : semi-supervised multi-objective explanation (in LISP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; vim: set ts=2 sw=2 sts=2 et : | |
(load "lib") | |
(defvar *help* " | |
xfun.lisp: LISP code for multi-objective semi-supervised explanations | |
OPTIONS: | |
-b bins max bin numbers = 16 | |
-f file csv file = ../data/auto93.csv | |
-g go start up action = nothing | |
-h help show help = nil | |
-K k Bayesian K = 1 | |
-m max max num cached = 512 | |
-M m Bayesian M = 2 | |
-p p dist coefficient = 2 | |
-s seed random number seed = 10013") | |
(setf *settings* (settings *help*)) | |
;------------------------------------------------------------------------------- | |
;## Structs | |
(defstruct data | |
"stores `rows`, summarized in `cols`" | |
rows cols) | |
(defstruct cols | |
"stores everything in `all`, independent/dependent things in `x`,`y`" | |
all x y names klass) | |
(defstruct num | |
"summarizes a stream of numbers" | |
(at 0) (txt "") (n 0) (w 1) ; w=1,-1 means "maximize", "minimize" | |
(hi most-negative-fixnum) | |
(lo most-positive-fixnum) | |
(mu 0) (m2 0)) | |
(defstruct sym | |
"summarizes a stream of symbols" | |
(at 0) (txt "") (n 0) (w 1) has (most 0) mode) | |
;------------------------------------------------------------------------------- | |
;## Create | |
(defun lst->cols (lst &aux (self (make-cols :names lst))) | |
"column names to cols, then added to `all` and (maybe) `x`, `y`, and `klass`" | |
(with-slots (all x y klass) self | |
(loop :for at :from 0 :and txt :in lst :do | |
(let* ((isNum (and (> (length txt) 0) (upper-case-p (char txt 0)))) | |
(what (if isNum #'make-num #'make-sym)) | |
(col (funcall what :at at :txt txt :w (if (got txt -1 #\-) -1 1)))) | |
(push col all) | |
(unless (got txt -1 #\X) | |
(if (got txt -1 #\! txt) (setf klass col)) | |
(if (got txt -1 #\! #\+ #\-) (push col y) (push col x)))))) | |
self) | |
(defun src->data (src &optional rows &aux (self (make-data))) | |
"from file if (stringp src); from list if (consp src); mimic structure if (data-p src)" | |
(labels ((row (x) (add self x))) | |
(cond ((stringp src) (with-file src #'row)) | |
((consp src) (mapc #'row src)) | |
((data-p src) (row (cols-names (data-cols src))))) | |
(mapc #'row rows) | |
self)) | |
;------------------------------------------------------------------------------- | |
;## Add | |
(defmethod add ((self data) lst) | |
"updates `rows` and `cols`" | |
(aif (data-cols self) | |
(push (add it lst) (data-rows self)) | |
(setf (data-cols self) (lst->cols lst)))) | |
(defmethod add ((self cols) lst) | |
"update nums and syms" | |
(dolist (tmp `(,(cols-x self) ,(cols-y self)) lst) | |
(dolist (col tmp) | |
(add col (elt lst (slot-value col 'at)))))) | |
(defmethod add ((self sym) x) | |
"update frequency counts (in `has`) and `most` and `mode`" | |
(with-slots (has n mode most) self | |
(unless (eql x #\?) | |
(incf n) | |
(incf (freq x has)) | |
(if (> (freq x has) most) (setf most (freq x has) mode x))))) | |
(defmethod add ((self num) x ) ;;; Add one thing, updating 'lo,hi' | |
"updates `lo`, `hi`, `mu`, `sd`" | |
(with-slots (n lo hi mu m2) self | |
(unless (eq x #\?) | |
(incf n) | |
(let ((d (- x mu))) | |
(incf mu (/ d n)) | |
(incf m2 (* d (- x mu))) | |
(setf lo (min x lo) | |
hi (max x hi)))))) | |
;------------------------------------------------------------------------------- | |
;## Queries | |
(defmethod mid ((self sym)) (sym-mode self)) | |
(defmethod mid ((self num)) (num-mu self)) | |
(defmethod div ((self sym)) | |
"diversity (entropy)." | |
(with-slots (has n) self | |
(labels ((fun (p) (if (<= p 0) 0 (* -1 (* p (log p 2)))))) | |
(loop for (_ . n1) in has sum (fun (/ n1 n)))))) | |
(defmethod div ((self num)) | |
"return standard deviation" | |
(with-slots (n m2) self (if (<= n 1) 0 (sqrt (/ m2 (- n 1)))))) | |
(defmethod like1 ((self sym) x prior) | |
(with-slots (n has) self | |
(/ (+ (freq x has) (* (? m) prior)) | |
(+ n (? m))))) | |
(defmethod like1 ((self num) x _) | |
(with-slots (mu n) self | |
(let ((sd (div self)) | |
(tiny 1E-32)) | |
(cond ((< x (- mu (* 4 sd))) 0) | |
((> x (+ mu (* 4 sd))) 0) | |
(t (let ((denom (sqrt (* 2 pi sd sd))) | |
(nom (exp (/ (* -1 (expt (- x mu) 2)) | |
(+ tiny (* 2 sd sd)))))) | |
(/ nom (+ denom tiny)))))))) | |
'(defmethod like ((self data) row nall nh) | |
(with-slots (rows cols) self | |
(let ((prior (/ (1+ (? k) (length rows)) (+ nall (* nh (? k))))) | |
(+ (log prior) | |
(loop :for col :in (cols-x cols) :sum | |
(let ((x (elt row (col-at col)))) | |
(if (eql x #\?) | |
0 | |
(log (like1 col x prior)))))))))) | |
(defmethod classify ((self data) row hs &aux out (most most-negative-fixnum)) | |
(dolist (h hs (values out most)) | |
(let ((tmp (like h row (data-n self) (1+ (length hs))))) | |
(if (> tmp most) (setq most tmp | |
out h))))) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; vi: set ts=2 sw=2 sts=2 et : | |
;## Globals | |
(defvar *settings* nil) | |
;## Macros | |
(defmacro ? (x &optional (lst '*settings*)) | |
"alist accessor, defaults to searching `*settings*`" | |
`(cdr (assoc ',x ,lst :test #'equalp))) | |
(defmacro aif (test then &optional else) | |
"used to test on a result that is also needed by `then`" | |
`(let ((it ,test)) (if it ,then ,else))) | |
(defmacro freq (x lst &optional (init 0)) | |
"frequency counts for small group of symbols (say, less than 50)" | |
`(cdr (or (assoc ,x ,lst :test #'equal) | |
(car (setf ,lst (cons (cons ,x ,init) ,lst)))))) | |
;------------------------------------------------------------------------------- | |
;## Sys | |
(defun args () | |
"accessing command-line flats" | |
#+clisp ext:*args* | |
#+sbcl sb-ext:*posix-argv*) | |
(defun goodbye (&optional (x 0)) | |
"quit list" | |
#+clisp (ext:exit x) | |
#+sbcl (sb-ext:exit :code x)) | |
;------------------------------------------------------------------------------- | |
(defvar *seed* 10013) | |
(defun rand (&optional (n 1)) | |
"random float 0.. < n" | |
(setf *seed* (mod (* 16807.0d0 *seed*) 2147483647.0d0)) | |
(* n (- 1.0d0 (/ *seed* 2147483647.0d0)))) | |
(defun rint (&optional (n 1) &aux (base 10000000000.0)) | |
"random int 0..n-1" | |
(floor (* n (/ (rand base) base)))) | |
;------------------------------------------------------------------------------- | |
;## Lists | |
(defun per (seq &optional (p .5)) | |
(elt seq (floor (* (min .999999 (max 0 p)) (length seq))))) | |
;------------------------------------------------------------------------------- | |
;## Strings | |
(defun trim (s) | |
"kill whitespace at start, at end" | |
(string-trim '(#\Space #\Tab #\Newline) s)) | |
(defun got (s n &rest chars) | |
"Does `s` hold any of `chars` at position `n` (negative `n` means 'from end of string')" | |
(let ((n (if (>= n 0) n (+ (length s) n)))) | |
(if (and (stringp s) (>= (1- (length s)) n)) | |
(dolist (c chars) | |
(if (eql c (char s n)) | |
(return-from got t)))))) | |
(defun split (s &optional (sep #\,) (filter #'thing) (here 0)) | |
"split `s`, divided by `sep` filtered through `filter`" | |
(let* ((there (position sep s :start here)) | |
(word (funcall filter (subseq s here there)))) | |
(labels ((tail () (if there (split s sep filter (1+ there))))) | |
(if (equal word "") (tail) (cons word (tail)))))) | |
(defun words (s) | |
"divide a string on space" | |
(split s #\Space #'trim)) | |
;------------------------------------------------------------------------------- | |
;## Strings to Things | |
(defun thing (s &aux (s1 (trim s))) | |
"coerce `s` into a number or string or t or nil or #\?" | |
(cond ((equal s1 "?") #\?) | |
((equal s1 "t") t) | |
((equal s1 "nil") nil) | |
(t (let ((n (read-from-string s1 nil nil))) | |
(if (numberp n) n s1))))) | |
(defun with-file (file fun &optional (filter #'split)) | |
"call `fun` for each line in `file`" | |
(with-open-file (s file) | |
(loop (funcall fun (funcall filter (or (read-line s nil) (return))))))) | |
;------------------------------------------------------------------------------- | |
;## Settings | |
(defun settings (s &optional args) | |
"for lines like ' -Key Flag ..... Default', return `(KEY . DEFAULT)`" | |
(loop | |
:for (flag key . lst) | |
:in (split s #\NewLine #'words) | |
:if (got flag 0 #\-) | |
:collect (cons (intern (string-upcase key)) | |
(cli args flag (thing (car (last lst))))))) | |
(defun cli (lst flag b4) | |
"if `flag` in `lst`, then update `b4` from `lst`" | |
(aif (member flag lst :test #'equal) | |
(cond ((eql b4 t) nil) | |
((eql b4 nil) t) | |
(t (thing (second it)))) | |
b4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
rlwrap sbcl --noinform - |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-include ../config/do.mk | |
DO_what= xfun: semi-supervised multi-objective explanation (in LISP) | |
DO_copyright= Copyright (c) 2023 Tim Menzies, BSD-2. | |
DO_repos= . ../config ../data | |
install: $(DO_repos) ## get related repos | |
brew install rlwrap clisp sbcl | |
../data: | |
(cd ..; git clone https://gist.github.com/d47b8699d9953eef14d516d6e54e742e.git data) | |
../config: | |
(cd ..; git clone https://gist.github.com/42f78b8beec9e98434b55438f9983ecc.git config) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
f=$1 | |
shift | |
$(which sbcl) --noinform --script $f $* \ | |
2> >( gawk '/^Backtrace / {exit} 1' ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; vi: set ts=2 sw=2 sts=2 et : | |
(load "code") | |
(defun tests () | |
`((rand | |
,(lambda (&aux (n (make-num))) | |
(dotimes (i 1000) (add n (expt (rand) 2))) | |
(print (mid n)) | |
(print (div n)) | |
(assert (<= .35 (mid n) .36)) | |
(assert (<= .30 (div n) .31)) | |
t)) | |
(num | |
,(lambda (&aux (n (make-num))) | |
(dotimes (i 1000) (add n i)) | |
(assert (<= 498 (mid n) 502)))) | |
(sym | |
,(lambda (&aux (s (make-sym))) | |
(dolist (x '(a a a a b b c)) (add s x)) | |
(assert (<= 1.37 (div s) 1.38) () "sym"))) | |
(data | |
,(lambda (&aux (d (src->data (? file)))) | |
(assert (eql 398 (length (data-rows d)))) | |
(assert (eql 4 (length (cols-x (data-cols d))))))) | |
)) | |
(let ((fails 0) | |
(b4 (copy-tree (setf *settings* (settings *help* (args)))))) | |
(if (? help) | |
(format t "~a~%" *help*) | |
(loop :for (key fun) :in (tests) :do | |
(setf *settings* (copy-tree b4) | |
*seed* (? seed)) | |
(when (member (? go) (list "all" key) | |
:key #'string-downcase :test #'equalp) | |
(format t "~%⚠️ ~a ~a " key fun) | |
(cond ((funcall fun) (format t " PASSED ✅~%")) | |
(t (format t " FAILED ❌~%") | |
(incf fails)))))) | |
(goodbye fails)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment