Skip to content

Instantly share code, notes, and snippets.

@kilian-gebhardt
Forked from dustalov/sigf.py
Created October 15, 2019 10:18
Show Gist options
  • Save kilian-gebhardt/0602c35fa34c1982dbb5d83718934773 to your computer and use it in GitHub Desktop.
Save kilian-gebhardt/0602c35fa34c1982dbb5d83718934773 to your computer and use it in GitHub Desktop.
An implementation of the sigf toolkit for randomization tests in Python 3
#!/usr/bin/env python
# This is an MIT-licensed implementation of the sigf toolkit
# for randomization tests: https://nlpado.de/~sebastian/software/sigf.shtml
from random import getrandbits
import sys
def randomized_test(model1, model2, score, trials):
print('# score(model1) = %f' % score(model1), file=sys.stderr)
print('# score(model2) = %f' % score(model2), file=sys.stderr)
diff = abs(score(model1) - score(model2))
print('# abs(diff) = %f' % diff, file=sys.stderr)
uncommon = [i for i in range(len(model1)) if model1[i] != model2[i]]
better = 0
for _ in range(trials):
model1_local, model2_local = list(model1), list(model2)
for i in uncommon:
if getrandbits(1) == 1:
model1_local[i], model2_local[i] = model2[i], model1[i]
assert len(model1_local) == len(model2_local) == len(model1) == len(model2)
diff_local = abs(score(model1_local) - score(model2_local))
if diff_local >= diff:
better += 1
p = (better + 1.) / (trials + 1.)
return p
def input_counts(f):
return [int(line.strip()) for line in f]
def input_tp_fp_fn(f):
result = []
for line in f:
line = line.strip()
if line: result.append(tuple(int(count) for count in line.split(' ', 2)))
return result
def f1_score(model):
tp = sum(obs[0] for obs in model)
tp_fp = sum(obs[1] for obs in model)
tp_fn = sum(obs[2] for obs in model)
if tp == 0 or tp_fp == 0 or tp_fn == 0: return 0.
precision, recall = tp / float(tp_fp), tp / float(tp_fn)
return 2 * precision * recall / (precision + recall)
if '__main__' == __name__:
import argparse
from statistics import mean
# Every element of SCORES is a pair of input reading function and
# scoring function.
SCORES = {
'mean': (input_counts, mean),
'f1': (input_tp_fp_fn, f1_score)
}
parser = argparse.ArgumentParser()
parser.add_argument('--score', choices=SCORES.keys(), default='mean')
parser.add_argument('--trials', '-n', type=int, default=10**5)
parser.add_argument('model1', type=argparse.FileType('r'))
parser.add_argument('model2', type=argparse.FileType('r'))
args = parser.parse_args()
reader, score = SCORES[args.score]
model1, model2 = reader(args.model1), reader(args.model2)
assert len(model1) == len(model2)
p = randomized_test(model1, model2, score, args.trials)
print('p-value = %f' % p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment