Skip to content

Instantly share code, notes, and snippets.

@andreasvc
Last active December 20, 2015 22:28
Show Gist options
  • Save andreasvc/6204982 to your computer and use it in GitHub Desktop.
Save andreasvc/6204982 to your computer and use it in GitHub Desktop.
Compare different strategies for adding a large number of small log probabilities.
""" Compare different strategies for adding a large number of small log
probabilities. """
from __future__ import print_function
from math import log, exp, fsum, isinf
from random import expovariate
N = 10000
def logprobadd(x, y):
""" Add two log probabilities in log space; i.e.:
>>> a = b = 0.25
>>> logprobadd(log(a), log(b)) == log(a + b) == log(0.5)
True
:param x, y: Python floats with log probabilities; -inf <= x, y <= 0.
:source: https://facwiki.cs.byu.edu/nlp/index.php/Log_Domain_Computations
"""
if isinf(x):
return y
elif isinf(y):
return x
# If one value is much smaller than the other, keep the larger value.
elif x < (y - 460): # ~= log(1e200)
return y
elif y < (x - 460): # ~= log(1e200)
return x
diff = y - x
assert not isinf(diff)
if isinf(exp(diff)): # difference is too large
return x if x > y else y
# otherwise return the sum.
return x + log(1.0 + exp(diff))
def logprobsum1(logprobs):
""" Takes a list of log probabilities and sums them producing a
normal probability 0 < p <= 1.0; i.e.:
>>> a = b = c = 0.25
>>> logprobsum([-log(a), -log(b), -log(c)]) == sum([a, b, c]) == 0.75
True
:param logprobs: a list of Python floats with log probilities,
s.t. -inf <= p <= 0 for each p in ``logprobs``.
:source: http://blog.smola.org/post/987977550/log-probabilities-semirings-and-floating-point-numbers
"""
maxprob = max(logprobs)
return exp(fsum([maxprob, log(fsum([exp(prob - maxprob)
for prob in logprobs]))]))
def logprobsum2(logprobs):
""" Replace first fsum with multiplication. """
maxprob = max(logprobs)
return exp(maxprob) * fsum([exp(prob - maxprob) for prob in logprobs])
def logprobsum3(logprobs):
""" Replace first fsum with normal addition. """
maxprob = max(logprobs)
return exp(maxprob + log(fsum([exp(prob - maxprob) for prob in logprobs])))
def incradd(logprobs):
""" Incremental conversion + addition """
incr = 0.0
for prob in logprobs:
incr += exp(prob)
return incr
def incrlogadd(logprobs):
""" Incremental addition in log space. """
incr = logprobs[0]
for prob in logprobs[1:]:
incr = logprobadd(incr, prob)
return exp(incr)
def compare(probs, logprobs):
""" Compare methods of adding probabilities and print a line w/results. """
ref = fsum(probs)
results = {name: ref - func(logprobs) for
name, func in FUNCS.items()}
results['sum'] = ref - sum(probs)
for a, b in sorted(results.items(), key=lambda x: abs(x[1])):
print('%5s %24r' % (a, b), end=' ')
print()
FUNCS = dict(
log1=logprobsum1,
log2=logprobsum2,
log3=logprobsum3,
inc=incradd,
inclg=incrlogadd)
def main():
"""" Run test 20 times. """
print('difference with reference, columns ordered by error')
compare([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1],
map(log, [.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]))
for _ in range(20):
logprobs = [expovariate(-1 / 100.) for _ in range(N)]
probs = map(exp, logprobs)
compare(probs, logprobs)
if __name__ == '__main__':
main()

'log2' appears to be the most accurate; a typical result:

log2                      0.0
log3   -5.684341886080802e-14
log1   -5.684341886080802e-14
sum    7.105427357601002e-14
inc    7.105427357601002e-14
inclg    9.180212146020494e-12

Speed benchmarks:

inclg: 100 loops, best of 3: 10.9 ms per loop
inc: 1000 loops, best of 3: 1.42 ms per loop
log2: 100 loops, best of 3: 11.1 ms per loop
log3: 100 loops, best of 3: 11.1 ms per loop
log1: 100 loops, best of 3: 11.1 ms per loop

Speed of adding normal probabilities:

sum: 10000 loops, best of 3: 148 us per loop
fsum: 100 loops, best of 3: 11.4 ms per loop
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment