Created
May 21, 2013 18:48
-
-
Save timm/5622240 to your computer and use it in GitHub Desktop.
Non-parametric hypothesis testing using Vargha and Delaney's A12 statistic.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; The A12 non-parametric test | |
; Tim Menzies, (c) 2013, [email protected] | |
; (c) http://creativecommons.org/licenses/by/3.0/ | |
; | |
; The Vargha and Delaney's A12 statistics is a non-parametric effect | |
; size measure. Reference: + A. Vargha and H. D. Delaney. A critique | |
; and improvement of the CL common language effect size statistics of | |
; McGraw and Wong. Journal of Educational and Behavioral Statistics, | |
; 25(2):101-132, 2000 | |
; | |
; Given a performance measure M seen in m measures of X and n measures | |
; of Y, the A12 statistics measures the probability that running | |
; algorithm X yields higher M values than running another algorithm Y. | |
; | |
; A12 = #(X > Y)/mn + 0.5*#(X=Y)/mn | |
; | |
; According to Vargha and Delaney, a small, medium, large difference | |
; between two populations: | |
; | |
; + Big is A12 over 0.71 | |
; + Medium is A12 over 0.64 | |
; + Small is A12 over 0.56 | |
; | |
; In my view, this seems gratitiously different to... | |
; | |
; + Big is A12 over three-quarters (0.75) | |
; + Medium is A12 over two-thirds (0.66) | |
; + Small is A12 over half (0.5) | |
; | |
; Whatever, the following code parameterizes that magic number | |
; so you can use the standard values if you want to. | |
; | |
; While A12 studies two treatments. LA12 handles multiple treatments. | |
; Samples from each population are sorted by their mean. Then | |
; b4= sample[i] and after= sample[i+1] and rank(after) = 1+rank(b4) | |
; if a12 reports that the two populations are different. | |
; | |
; To simplify that process, I offer the following syntax. A population | |
; is a list of numbers, which may be unsorted, and starts with some | |
; symbol or string describing the population. A12s expects a list of | |
; such populations. For examples of that syntax, see these functions: | |
(defun demo () | |
(_a12sa) | |
(_a12sb) | |
nil) | |
(defun _a12sa () | |
(terpri) | |
(a12s '((x1 0.34 0.49 0.51 0.60) | |
(x2 0.6 0.7 0.8 0.90) | |
(x3 0.15 0.25 0.4 0.35) | |
(x4 0.6 0.7 0.8 0.90) | |
(x5 0.1 0.2 0.3 0.40))))) | |
; If this code works, you should see... | |
; | |
; ((RANK 1 RX X2 MEAN 0.75 POP (0.9 0.8 0.7 0.6)) | |
; (RANK 1 RX X4 MEAN 0.75 POP (0.9 0.8 0.7 0.6)) | |
; (RANK 2 RX X1 MEAN 0.485 POP (0.6 0.51 0.49 0.34)) | |
; (RANK 3 RX X3 MEAN 0.2875 POP (0.4 0.35 0.25 0.15)) | |
; (RANK 3 RX X5 MEAN 0.25 POP (0.4 0.3 0.2 0.1))) | |
; | |
; i.e. our treatments are sorted in the order x2, x4, x1, x3, x5 | |
; and the first two and the last two get the same ranks. | |
(defun _a12sb () | |
(terpri) | |
(a12s '((y1 101 100 99 101 99.5) | |
(y2 101 100 99 101 100.0) | |
(y3 101 100 99.5 101 99.0) | |
(y4 101 100 99 101 100.0))))) | |
; If this code works, you should see... | |
; | |
; ((RANK 1 RX Y2 MEAN 100.2 POP (101 101 100 100.0 99)) | |
; (RANK 1 RX Y4 MEAN 100.2 POP (101 101 100 100.0 99)) | |
; (RANK 1 RX Y1 MEAN 100.1 POP (101 101 100 99.5 99)) | |
; (RANK 1 RX Y3 MEAN 100.1 POP (101 101 100 99.5 99.0))) | |
; | |
; i.e. all the above are so similar that they all get the same rank. | |
; | |
; Finally, the linear search of LA12S is not optimal. A better way | |
; would be a iterative partitioning approach that finds the split in | |
; the treatments that maximizes the expected value of the differences | |
; between the mean of the splits and mean of the original population. | |
; This is called the Scott-Knott test and is discussed in detail on | |
; Nikolaos Mittas, Lefteris Angelis: Ranking and Clustering Software | |
; Cost Estimation Models through a Multiple Comparisons Algorithm. | |
; IEEE Trans. Software Eng. 39(4): 537-551 (2013). | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(defun largeDifference (pop1 pop2) (bigger pop1 pop2 0.75)) | |
(defun mediumDifference (pop1 pop2) (bigger pop1 pop2 0.66)) | |
(defun smallDifference (pop1 pop2) (bigger pop1 pop2 0.50)) | |
(defun bigger (pop1 pop2 &optional (enough 0.66)) | |
(let ((n (a12 pop1 pop2))) | |
(values (> n enough) | |
n))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; data structure stuf | |
(defstruct rx | |
list | |
name | |
(more 0) | |
(same 0) | |
(rank 1) | |
mean) | |
(defun rx! (lst &optional (> #'>) name) | |
(let ((out (make-rx :name name)) | |
(sorted (sort lst >))) | |
(setf (rx-list out) sorted | |
(rx-mean out) (mean sorted)) | |
out)) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; Top-level driver | |
(defun a12s(pops &optional (> #'>) (enough 0.66)) | |
"Given n populations, sort each by their mean | |
then rank i compared to i+1" | |
(labels ((show (rx) | |
`(rank ,(rx-rank rx) | |
rx ,(rx-name rx) | |
mean ,(rx-mean rx) | |
pop ,(rx-list rx)))) | |
(let* ((rxs (sort | |
(mapcar #'(lambda (lst) | |
(rx! (cdr lst) > (car lst))) pops) | |
> :key #'rx-mean)) | |
(rank 1) | |
out | |
(b4 (car rxs))) | |
(setf (rx-rank b4) rank) | |
(push (show b4) out) | |
(dolist (after (cdr rxs) (reverse out)) | |
(incf rank (if (bigger | |
(rx-list b4) | |
(rx-list after) | |
enough) 1 0)) | |
(setf (rx-rank after) rank) | |
(push (show after) out) | |
(setf b4 after))))) | |
;; | |
; the guts of the process | |
(defun a12 (pop1 pop2 &optional (> #'>)) | |
(let ((m (length pop1)) | |
(n (length pop2)) | |
(one (rx! pop1 >)) | |
(two (rx! pop2 >))) | |
(labels ((stop () (or (null (rx-list one)) | |
(null (rx-list two)))) | |
(head (x) (car (rx-list x))) | |
(size (x) (length (rx-list x))) | |
(next! (x) (pop (rx-list x)) x) | |
(more! (x n) (incf (rx-more x) n)) | |
(same! (x) (incf (rx-same x))) | |
(walk (l1 l2) | |
(unless (stop) | |
(cond ((funcall > (head l1) (head l2)) | |
(more! l1 (size l2)) | |
(walk (next! l1) l2)) | |
((= (head l1) (head l2)) | |
(same! l1) | |
(same! l2) | |
(walk l1 (next! l2))) | |
(t | |
(walk l2 l1)))))) | |
(walk one two) | |
(+ (/ (rx-more one) (* m n)) | |
(/ (* 0.5 (rx-same one)) (* m n)))))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;xs | |
; standard stuff | |
(defun mean (lst) | |
(float (/ (reduce #'+ lst) | |
(length lst)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment