|
""" Compare different strategies for adding a large number of small log |
|
probabilities. """ |
|
from __future__ import print_function |
|
from math import log, exp, fsum, isinf |
|
from random import expovariate |
|
|
|
N = 10000 |
|
|
|
|
|
def logprobadd(x, y): |
|
""" Add two log probabilities in log space; i.e.: |
|
|
|
>>> a = b = 0.25 |
|
>>> logprobadd(log(a), log(b)) == log(a + b) == log(0.5) |
|
True |
|
|
|
:param x, y: Python floats with log probabilities; -inf <= x, y <= 0. |
|
:source: https://facwiki.cs.byu.edu/nlp/index.php/Log_Domain_Computations |
|
""" |
|
if isinf(x): |
|
return y |
|
elif isinf(y): |
|
return x |
|
# If one value is much smaller than the other, keep the larger value. |
|
elif x < (y - 460): # ~= log(1e200) |
|
return y |
|
elif y < (x - 460): # ~= log(1e200) |
|
return x |
|
diff = y - x |
|
assert not isinf(diff) |
|
if isinf(exp(diff)): # difference is too large |
|
return x if x > y else y |
|
# otherwise return the sum. |
|
return x + log(1.0 + exp(diff)) |
|
|
|
|
|
def logprobsum1(logprobs): |
|
""" Takes a list of log probabilities and sums them producing a |
|
normal probability 0 < p <= 1.0; i.e.: |
|
|
|
>>> a = b = c = 0.25 |
|
>>> logprobsum([-log(a), -log(b), -log(c)]) == sum([a, b, c]) == 0.75 |
|
True |
|
|
|
:param logprobs: a list of Python floats with log probilities, |
|
s.t. -inf <= p <= 0 for each p in ``logprobs``. |
|
:source: http://blog.smola.org/post/987977550/log-probabilities-semirings-and-floating-point-numbers |
|
""" |
|
maxprob = max(logprobs) |
|
return exp(fsum([maxprob, log(fsum([exp(prob - maxprob) |
|
for prob in logprobs]))])) |
|
|
|
|
|
def logprobsum2(logprobs): |
|
""" Replace first fsum with multiplication. """ |
|
maxprob = max(logprobs) |
|
return exp(maxprob) * fsum([exp(prob - maxprob) for prob in logprobs]) |
|
|
|
|
|
def logprobsum3(logprobs): |
|
""" Replace first fsum with normal addition. """ |
|
maxprob = max(logprobs) |
|
return exp(maxprob + log(fsum([exp(prob - maxprob) for prob in logprobs]))) |
|
|
|
|
|
def incradd(logprobs): |
|
""" Incremental conversion + addition """ |
|
incr = 0.0 |
|
for prob in logprobs: |
|
incr += exp(prob) |
|
return incr |
|
|
|
|
|
def incrlogadd(logprobs): |
|
""" Incremental addition in log space. """ |
|
incr = logprobs[0] |
|
for prob in logprobs[1:]: |
|
incr = logprobadd(incr, prob) |
|
return exp(incr) |
|
|
|
|
|
def compare(probs, logprobs): |
|
""" Compare methods of adding probabilities and print a line w/results. """ |
|
ref = fsum(probs) |
|
results = {name: ref - func(logprobs) for |
|
name, func in FUNCS.items()} |
|
results['sum'] = ref - sum(probs) |
|
for a, b in sorted(results.items(), key=lambda x: abs(x[1])): |
|
print('%5s %24r' % (a, b), end=' ') |
|
print() |
|
|
|
|
|
FUNCS = dict( |
|
log1=logprobsum1, |
|
log2=logprobsum2, |
|
log3=logprobsum3, |
|
inc=incradd, |
|
inclg=incrlogadd) |
|
|
|
|
|
def main(): |
|
"""" Run test 20 times. """ |
|
print('difference with reference, columns ordered by error') |
|
compare([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1], |
|
map(log, [.1, .1, .1, .1, .1, .1, .1, .1, .1, .1])) |
|
for _ in range(20): |
|
logprobs = [expovariate(-1 / 100.) for _ in range(N)] |
|
probs = map(exp, logprobs) |
|
compare(probs, logprobs) |
|
|
|
if __name__ == '__main__': |
|
main() |