maffoo · August 29, 2015 14:21
diff --git a/amdahl.png b/amdahl.png
diff --git a/amdahl.py b/amdahl.py
 """
 Will it thread?

 A simple demonstration of Amdahl's law, which quantifies the amount of speedup
 that can be obtained by parallelizing a program when only some fraction of the
 code is parallelizable (http://en.wikipedia.org/wiki/Amdahl%27s_law).

 This is of interest in python because the python interpreter has a global
 interpreter lock (GIL) which means that even "multi-threaded" python code is
 effectively single-threaded and cannot take advantage of multiple cores.
 Numpy code is an exception because many numpy routines that call into C
 libraries actually release the GIL, and so we might be able to benefit from
 multithreading, depending on the actual fraction of time spent in C code.

 We test this for a couple of different numpy functions, convolve and fft, which
 turn out to behave rather differently from each other and, in the case of fft,
 differently based on the arguments passed in as well.
 """

 from functools import partial
 import time
 import threading
 import matplotlib.pyplot as plt
 import numpy as np

 def seq(n, f, *args, **kw):
    """Time to execute a function n times in sequence"""
    start = time.time()
    for _ in xrange(n):
        f(*args, **kw)
    end = time.time()
    return end - start

 def par(n, f, *args, **kw):
    """Time to execute a function n times in parallel, each in its own thread"""
    threads = [threading.Thread(target=f, args=args, kwargs=kw)
               for _ in range(n)]
    start = time.time()
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    end = time.time()
    return end - start

 def par_speedup(n, f, *args, **kw):
    """Speedup of parallel (threaded) versus sequential execution"""
    t_par = par(n, f, *args, **kw)
    t_seq = seq(n, f, *args, **kw)
    return t_seq / t_par

 def parallelizable_fraction(n, speedup):
    """Determine the fraction of code that is parallelizable using Amdahl's law

    Given the number of parallel threads n and the speedup, returns the
    effective fraction of the code that can be parallelized.
    """
    return (1/speedup - 1) / (1/ns - 1)


 ns = np.arange(1, 11)

 # Note: these are pretty slow; we want to maximize the time spent in C code
 test_cases = {
    'convolve': partial(np.convolve, np.arange(100000), np.arange(10000)),
    'fft': partial(np.fft.fft, np.arange(10000000)),
    'fft_pow2': partial(np.fft.fft, np.arange(2**23)),
 }
 test_names = sorted(test_cases.keys())
 test_results = {name: np.array([par_speedup(n, f) for n in ns])
                for name, f in test_cases.items()}

 plt.subplot(211)
 for name in test_names:
    y = test_results[name]
    plt.plot(ns, y, '.-', label=name)
 plt.ylabel('speedup')
 plt.legend()

 plt.subplot(212)
 for name in test_names:
    y = parallelizable_fraction(ns, test_results[name])
    plt.plot(ns, y, '.-', label=name)
 plt.xlabel('parallel threads')
 plt.ylabel('parallelizable fraction')
 plt.legend()

 plt.show()
	"""
	Will it thread?

	A simple demonstration of Amdahl's law, which quantifies the amount of speedup
	that can be obtained by parallelizing a program when only some fraction of the
	code is parallelizable (http://en.wikipedia.org/wiki/Amdahl%27s_law).

	This is of interest in python because the python interpreter has a global
	interpreter lock (GIL) which means that even "multi-threaded" python code is
	effectively single-threaded and cannot take advantage of multiple cores.
	Numpy code is an exception because many numpy routines that call into C
	libraries actually release the GIL, and so we might be able to benefit from
	multithreading, depending on the actual fraction of time spent in C code.

	We test this for a couple of different numpy functions, convolve and fft, which
	turn out to behave rather differently from each other and, in the case of fft,
	differently based on the arguments passed in as well.
	"""

	from functools import partial
	import time
	import threading
	import matplotlib.pyplot as plt
	import numpy as np

	def seq(n, f, args, *kw):
	"""Time to execute a function n times in sequence"""
	start = time.time()
	for _ in xrange(n):
	f(args, *kw)
	end = time.time()
	return end - start

	def par(n, f, args, *kw):
	"""Time to execute a function n times in parallel, each in its own thread"""
	threads = [threading.Thread(target=f, args=args, kwargs=kw)
	for _ in range(n)]
	start = time.time()
	for t in threads:
	t.start()
	for t in threads:
	t.join()
	end = time.time()
	return end - start

	def par_speedup(n, f, args, *kw):
	"""Speedup of parallel (threaded) versus sequential execution"""
	t_par = par(n, f, args, *kw)
	t_seq = seq(n, f, args, *kw)
	return t_seq / t_par

	def parallelizable_fraction(n, speedup):
	"""Determine the fraction of code that is parallelizable using Amdahl's law

	Given the number of parallel threads n and the speedup, returns the
	effective fraction of the code that can be parallelized.
	"""
	return (1/speedup - 1) / (1/ns - 1)


	ns = np.arange(1, 11)

	# Note: these are pretty slow; we want to maximize the time spent in C code
	test_cases = {
	'convolve': partial(np.convolve, np.arange(100000), np.arange(10000)),
	'fft': partial(np.fft.fft, np.arange(10000000)),
	'fft_pow2': partial(np.fft.fft, np.arange(2**23)),
	}
	test_names = sorted(test_cases.keys())
	test_results = {name: np.array([par_speedup(n, f) for n in ns])
	for name, f in test_cases.items()}

	plt.subplot(211)
	for name in test_names:
	y = test_results[name]
	plt.plot(ns, y, '.-', label=name)
	plt.ylabel('speedup')
	plt.legend()

	plt.subplot(212)
	for name in test_names:
	y = parallelizable_fraction(ns, test_results[name])
	plt.plot(ns, y, '.-', label=name)
	plt.xlabel('parallel threads')
	plt.ylabel('parallelizable fraction')
	plt.legend()

	plt.show()