-
-
Save kilian-gebhardt/0602c35fa34c1982dbb5d83718934773 to your computer and use it in GitHub Desktop.
An implementation of the sigf toolkit for randomization tests in Python 3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This is an MIT-licensed implementation of the sigf toolkit | |
# for randomization tests: https://nlpado.de/~sebastian/software/sigf.shtml | |
from random import getrandbits | |
import sys | |
def randomized_test(model1, model2, score, trials): | |
print('# score(model1) = %f' % score(model1), file=sys.stderr) | |
print('# score(model2) = %f' % score(model2), file=sys.stderr) | |
diff = abs(score(model1) - score(model2)) | |
print('# abs(diff) = %f' % diff, file=sys.stderr) | |
uncommon = [i for i in range(len(model1)) if model1[i] != model2[i]] | |
better = 0 | |
for _ in range(trials): | |
model1_local, model2_local = list(model1), list(model2) | |
for i in uncommon: | |
if getrandbits(1) == 1: | |
model1_local[i], model2_local[i] = model2[i], model1[i] | |
assert len(model1_local) == len(model2_local) == len(model1) == len(model2) | |
diff_local = abs(score(model1_local) - score(model2_local)) | |
if diff_local >= diff: | |
better += 1 | |
p = (better + 1.) / (trials + 1.) | |
return p | |
def input_counts(f): | |
return [int(line.strip()) for line in f] | |
def input_tp_fp_fn(f): | |
result = [] | |
for line in f: | |
line = line.strip() | |
if line: result.append(tuple(int(count) for count in line.split(' ', 2))) | |
return result | |
def f1_score(model): | |
tp = sum(obs[0] for obs in model) | |
tp_fp = sum(obs[1] for obs in model) | |
tp_fn = sum(obs[2] for obs in model) | |
if tp == 0 or tp_fp == 0 or tp_fn == 0: return 0. | |
precision, recall = tp / float(tp_fp), tp / float(tp_fn) | |
return 2 * precision * recall / (precision + recall) | |
if '__main__' == __name__: | |
import argparse | |
from statistics import mean | |
# Every element of SCORES is a pair of input reading function and | |
# scoring function. | |
SCORES = { | |
'mean': (input_counts, mean), | |
'f1': (input_tp_fp_fn, f1_score) | |
} | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--score', choices=SCORES.keys(), default='mean') | |
parser.add_argument('--trials', '-n', type=int, default=10**5) | |
parser.add_argument('model1', type=argparse.FileType('r')) | |
parser.add_argument('model2', type=argparse.FileType('r')) | |
args = parser.parse_args() | |
reader, score = SCORES[args.score] | |
model1, model2 = reader(args.model1), reader(args.model2) | |
assert len(model1) == len(model2) | |
p = randomized_test(model1, model2, score, args.trials) | |
print('p-value = %f' % p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment