Last active
August 1, 2023 20:30
-
-
Save Richardn2002/6f941d7ecb3d249fdd106e2c6e6aef01 to your computer and use it in GitHub Desktop.
Wilcoxon Ranksum Test in C++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <cmath> | |
#include <vector> | |
using std::erfc; | |
using std::sort; | |
using std::sqrt; | |
using std::vector; | |
double calc(const vector<double> &sample1, const vector<double> &sample2) { | |
vector<double> former(sample1); | |
vector<double> latter(sample2); | |
sort(former.begin(), former.end()); | |
sort(latter.begin(), latter.end()); | |
// iterator to the current entry in former | |
auto it_former = former.begin(); | |
// iterator to the current entry in latter | |
auto it_latter = latter.begin(); | |
// continue increasing no matter of ties | |
unsigned int cnt = 0; | |
double ranksum = 0.0; | |
while (it_former != former.end()) { | |
double entry_former = *it_former; | |
// the previous value, to determine ties | |
double entry_prev = NAN; | |
// the length of current tie | |
double tie_cnt = 0; | |
double entry_latter; | |
while (it_latter != latter.end() && (entry_latter = *it_latter) <= entry_former) { | |
if (entry_latter == entry_prev) { | |
++tie_cnt; | |
} else { | |
// different from prev, resetting | |
tie_cnt = 1; | |
entry_prev = entry_latter; | |
} | |
++cnt; | |
++it_latter; | |
} | |
if (entry_prev != entry_former) { | |
// tie does not continue into former | |
tie_cnt = 1; | |
} else { | |
++tie_cnt; | |
} | |
// how many entries in the current tie are from former | |
double tie_cnt_former = 1; | |
// consume current entry | |
++cnt; | |
// deplete any possible ties | |
while ((++it_former) != former.end() && *it_former == entry_former) { | |
++tie_cnt; | |
++tie_cnt_former; | |
++cnt; | |
} | |
// ranksum += the average of all integers within [cnt - tie_cnt + 1, cnt] * tie_cnt_former | |
ranksum += (cnt - tie_cnt + 1 + cnt) * tie_cnt_former / 2.0; | |
} | |
unsigned int n1 = sample1.size(); | |
unsigned int n2 = sample2.size(); | |
// the ranksum of the latter sample is approximated as a norm dist, | |
// with parameters (mean and variance) as following: | |
double mean = (n1 * (n1 + n2 + 1)) / 2.0; | |
double var = (n1 * n2 * (n1 + n2 + 1)) / 12.0; | |
double z = (ranksum - mean - 0.5 + (ranksum < mean)) / sqrt(var); | |
double p = 1.0 - 0.5 * erfc(-z * sqrt(0.5)); | |
return p; | |
} | |
#include <iostream> | |
int main() { | |
// example samples from my "Probabilistic Methods in Engineering" lecture slides, cr. Prof. Horst Hohberger, | |
// University of Michigan - Shanghai Jiao Tong University Joint Institute | |
vector<double> small = {18.5, 12.25, 3, 15, 19.75, 11.25, 11.75, 19.25, 12.25, 19.75, 16.25, 13, 19.25, 1.75}; | |
vector<double> large = {5.5, 5.5, 12.75, 18.75, 19.25, 11.25, 11.5, 11.5, 12.25, 14.25, | |
9.25, 14.5, 13.25, 8.25, 16.75, 10.5, 6, 15.25, 6.5, 12.5, | |
10.5, 8.75, 11.5, 17, 2.75, 13.25, 19, 16.5, 11.5, 1.75}; | |
std::cout << calc(small, large) << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Forgot to apply continuity correction. Corrected.