andreasvc · December 20, 2015 22:28
diff --git a/logprobs.py b/logprobs.py
 """ Compare different strategies for adding a large number of small log
 probabilities. """
 from __future__ import print_function
 from math import log, exp, fsum, isinf
 from random import expovariate

 N = 10000


 def logprobadd(x, y):
 	""" Add two log probabilities in log space; i.e.:

 	>>> a = b = 0.25
 	>>> logprobadd(log(a), log(b)) == log(a + b) == log(0.5)
 	True

 	:param x, y: Python floats with log probabilities; -inf <= x, y <= 0.
 	:source: https://facwiki.cs.byu.edu/nlp/index.php/Log_Domain_Computations
 	"""
 	if isinf(x):
 		return y
 	elif isinf(y):
 		return x
 	# If one value is much smaller than the other, keep the larger value.
 	elif x < (y - 460):  # ~= log(1e200)
 		return y
 	elif y < (x - 460):  # ~= log(1e200)
 		return x
 	diff = y - x
 	assert not isinf(diff)
 	if isinf(exp(diff)):    # difference is too large
 		return x if x > y else y
 	# otherwise return the sum.
 	return x + log(1.0 + exp(diff))


 def logprobsum1(logprobs):
 	""" Takes a list of log probabilities and sums them producing a
 	normal probability 0 < p <= 1.0; i.e.:

 	>>> a = b = c = 0.25
 	>>> logprobsum([-log(a), -log(b), -log(c)]) == sum([a, b, c]) == 0.75
 	True

 	:param logprobs: a list of Python floats with log probilities,
 		s.t. -inf <= p <= 0 for each p in ``logprobs``.
 	:source: http://blog.smola.org/post/987977550/log-probabilities-semirings-and-floating-point-numbers
 	"""
 	maxprob = max(logprobs)
 	return exp(fsum([maxprob, log(fsum([exp(prob - maxprob)
 			for prob in logprobs]))]))


 def logprobsum2(logprobs):
 	""" Replace first fsum with multiplication. """
 	maxprob = max(logprobs)
 	return exp(maxprob) * fsum([exp(prob - maxprob) for prob in logprobs])


 def logprobsum3(logprobs):
 	""" Replace first fsum with normal addition. """
 	maxprob = max(logprobs)
 	return exp(maxprob + log(fsum([exp(prob - maxprob) for prob in logprobs])))


 def incradd(logprobs):
 	""" Incremental conversion + addition  """
 	incr = 0.0
 	for prob in logprobs:
 		incr += exp(prob)
 	return incr


 def incrlogadd(logprobs):
 	""" Incremental addition in log space. """
 	incr = logprobs[0]
 	for prob in logprobs[1:]:
 		incr = logprobadd(incr, prob)
 	return exp(incr)


 def compare(probs, logprobs):
 	""" Compare methods of adding probabilities and print a line w/results. """
 	ref = fsum(probs)
 	results = {name: ref - func(logprobs) for
 			name, func in FUNCS.items()}
 	results['sum'] = ref - sum(probs)
 	for a, b in sorted(results.items(), key=lambda x: abs(x[1])):
 		print('%5s %24r' % (a, b), end=' ')
 	print()


 FUNCS = dict(
 		log1=logprobsum1,
 		log2=logprobsum2,
 		log3=logprobsum3,
 		inc=incradd,
 		inclg=incrlogadd)


 def main():
 	"""" Run test 20 times. """
 	print('difference with reference, columns ordered by error')
 	compare([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1],
 			map(log, [.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]))
 	for _ in range(20):
 		logprobs = [expovariate(-1 / 100.) for _ in range(N)]
 		probs = map(exp, logprobs)
 		compare(probs, logprobs)

 if __name__ == '__main__':
 	main()
diff --git a/results.md b/results.md
	""" Compare different strategies for adding a large number of small log
	probabilities. """
	from __future__ import print_function
	from math import log, exp, fsum, isinf
	from random import expovariate

	N = 10000


	def logprobadd(x, y):
	""" Add two log probabilities in log space; i.e.:

	>>> a = b = 0.25
	>>> logprobadd(log(a), log(b)) == log(a + b) == log(0.5)
	True

	:param x, y: Python floats with log probabilities; -inf <= x, y <= 0.
	:source: https://facwiki.cs.byu.edu/nlp/index.php/Log_Domain_Computations
	"""
	if isinf(x):
	return y
	elif isinf(y):
	return x
	# If one value is much smaller than the other, keep the larger value.
	elif x < (y - 460): # ~= log(1e200)
	return y
	elif y < (x - 460): # ~= log(1e200)
	return x
	diff = y - x
	assert not isinf(diff)
	if isinf(exp(diff)): # difference is too large
	return x if x > y else y
	# otherwise return the sum.
	return x + log(1.0 + exp(diff))


	def logprobsum1(logprobs):
	""" Takes a list of log probabilities and sums them producing a
	normal probability 0 < p <= 1.0; i.e.:

	>>> a = b = c = 0.25
	>>> logprobsum([-log(a), -log(b), -log(c)]) == sum([a, b, c]) == 0.75
	True

	:param logprobs: a list of Python floats with log probilities,
	s.t. -inf <= p <= 0 for each p in ``logprobs``.
	:source: http://blog.smola.org/post/987977550/log-probabilities-semirings-and-floating-point-numbers
	"""
	maxprob = max(logprobs)
	return exp(fsum([maxprob, log(fsum([exp(prob - maxprob)
	for prob in logprobs]))]))


	def logprobsum2(logprobs):
	""" Replace first fsum with multiplication. """
	maxprob = max(logprobs)
	return exp(maxprob) * fsum([exp(prob - maxprob) for prob in logprobs])


	def logprobsum3(logprobs):
	""" Replace first fsum with normal addition. """
	maxprob = max(logprobs)
	return exp(maxprob + log(fsum([exp(prob - maxprob) for prob in logprobs])))


	def incradd(logprobs):
	""" Incremental conversion + addition """
	incr = 0.0
	for prob in logprobs:
	incr += exp(prob)
	return incr


	def incrlogadd(logprobs):
	""" Incremental addition in log space. """
	incr = logprobs[0]
	for prob in logprobs[1:]:
	incr = logprobadd(incr, prob)
	return exp(incr)


	def compare(probs, logprobs):
	""" Compare methods of adding probabilities and print a line w/results. """
	ref = fsum(probs)
	results = {name: ref - func(logprobs) for
	name, func in FUNCS.items()}
	results['sum'] = ref - sum(probs)
	for a, b in sorted(results.items(), key=lambda x: abs(x[1])):
	print('%5s %24r' % (a, b), end=' ')
	print()


	FUNCS = dict(
	log1=logprobsum1,
	log2=logprobsum2,
	log3=logprobsum3,
	inc=incradd,
	inclg=incrlogadd)


	def main():
	"""" Run test 20 times. """
	print('difference with reference, columns ordered by error')
	compare([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1],
	map(log, [.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]))
	for _ in range(20):
	logprobs = [expovariate(-1 / 100.) for _ in range(N)]
	probs = map(exp, logprobs)
	compare(probs, logprobs)

	if __name__ == '__main__':
	main()