Last active
June 5, 2017 05:47
-
-
Save fwhigh/218a1a9582945acbcd2c196ef542f90b to your computer and use it in GitHub Desktop.
Fast and Lean Ad Hoc Binary Classifier Evaluation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
for a in {1..3}; do perf -ROC < kddb.t_eval.subsample.txt > /dev/null; done | |
time for a in {1..10}; do perf -ROC < kddb.t_eval.subsample.txt; done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
awk -v OFS=$'\t' ' | |
{ | |
diff=$2-$1 | |
s1+=diff | |
s2+=diff*diff | |
} | |
END { | |
print "RMS",sqrt((NR*s2 - s1 * s1)/(NR * (NR - 1))) | |
}' kddb.t_eval.subsample.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
awk -v OFS=$'\t' -v decimals=3 ' | |
BEGIN { max=10^decimals; min=1 } | |
{ | |
score_bin=int(max*$2) | |
if ($1 > 0) { pos[score_bin]++ } else { neg[score_bin]++ } | |
} | |
END { | |
ctp_prev=pos[max] | |
cfp_prev=neg[max] | |
for (i = max-1; i >= min; i--) { | |
ctp=ctp_prev+pos[i] | |
cfp=cfp_prev+neg[i] | |
auc+=ctp*(cfp-cfp_prev) | |
ctp_prev=ctp | |
cfp_prev=cfp | |
} | |
print "ROC",auc/(ctp*cfp) | |
}' kddb.t_eval.subsample.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
awk -v OFS=$'\t' ' | |
{ | |
if ($2 >= 0.5) { | |
if ($1 > 0) { tp++ } else { fp++ } | |
} else { | |
if ($1 > 0) { fn++ } else { tn++ } | |
} | |
} | |
END { | |
n=tp+fp | |
ntot=tp+fp+tn+fn | |
pos=tp+fn | |
#neg=fp+tn | |
recall=tp/pos | |
reach=n/ntot | |
precision=tp/n | |
accuracy=(tp+tn)/ntot | |
f1score=2*tp/(2*tp+fp+fn) | |
lift=recall/reach | |
print "ACC",accuracy | |
print "PRE",precision | |
print "REC",recall | |
print "PRF",f1score | |
print "LFT",lift | |
}' kddb.t_eval.subsample.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
wget http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kddb.bz2 | |
wget http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kddb.t.bz2 | |
bunzip2 kddb.bz2 | |
bunzip2 kddb.t.bz2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
paste -d' ' <(cut -d' ' -f 1 kddb.t) <(awk '{print 1/(1+exp(-$1))}' kddb.t_scores.txt) > kddb.t_eval.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
perf -ROC < kddb.t_eval.subsample.txtawk_auc_eval |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
perf -PRE -REC -ACC -LFT -PRF < kddb.t_eval.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
perf -RMS < kddb.t_eval.subsample.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
time awk '{$1=$1*2-1" |Features"; print $0}' kddb.t | \ | |
vw --loss_function logistic --initial_regressor model.vw \ | |
-p kddb.t_scores.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
time sort -t' ' -g -r -k 2,2 kddb.t_eval.subsample.txt > /dev/null |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
awk ' | |
BEGIN { srand(42) } | |
rand() < 500000/748401 { | |
n++; if (n>=500000) { exit 0 } print | |
}' kddb.t_eval.txt > kddb.t_eval.subsample.txt | |
{% endhighlight %} | |
Now evaluate with perf on the subsample. | |
{% highlight bash linenos %} | |
perf -PRE -REC -ACC -LFT -PRF | |
kddb.t_eval.subsample.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
awk '{$1=$1*2-1" |Features"; print $0}' kddb | \ | |
vw --loss_function logistic --final_regressor model.vw |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment