Created
February 26, 2019 10:53
-
-
Save mikaem/e093894fba07234c01f23bb9176830b4 to your computer and use it in GitHub Desktop.
Test speed of subclassed Numpy array DistributedArray compared to pure Numpy array
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from mpi4py import MPI | |
from mpi4py_fft import PFFT, DistributedArray, newDarray, Function | |
rank = 1 | |
N = (32, 32, 32) | |
FFT = PFFT(MPI.COMM_WORLD, N) | |
M = (3,)*rank + N | |
# Create 3 versions of array | |
a0 = newDarray(FFT, False, val=0, rank=rank) | |
a1 = a0.__array__() | |
a2 = np.zeros(M) | |
b0 = newDarray(FFT, False, val=1, rank=rank) | |
b1 = b0.__array__() | |
b2 = np.ones(M) | |
c0 = newDarray(FFT, False, val=1, rank=rank) | |
c1 = c0.__array__() | |
c2 = np.ones(M) | |
def add(a, b, c): | |
a += b*c | |
return a | |
def add2(a, b, c): | |
a += 2.0*b*c | |
return a | |
if __name__ == '__main__': | |
import timeit | |
# I see no penalty for the add function | |
print("a += 1*1") | |
print(timeit.timeit("a0=add(a0, 1, 1)", number=1000, setup="from __main__ import add, a0")) | |
print(timeit.timeit("a1=add(a1, 1, 1)", number=1000, setup="from __main__ import add, a1")) | |
print(timeit.timeit("a2=add(a2, 1, 1)", number=1000, setup="from __main__ import add, a2")) | |
print("a += b*1") | |
print(timeit.timeit("a0=add(a0, b0, 1)", number=1000, setup="from __main__ import add, a0, b0")) | |
print(timeit.timeit("a1=add(a1, b1, 1)", number=1000, setup="from __main__ import add, a1, b1")) | |
print(timeit.timeit("a2=add(a2, b2, 1)", number=1000, setup="from __main__ import add, a2, b2")) | |
print("a += b*c") | |
print(timeit.timeit("a0=add(a0, b0, c0)", number=1000, setup="from __main__ import add, a0, b0, c0")) | |
print(timeit.timeit("a1=add(a1, b1, c1)", number=1000, setup="from __main__ import add, a1, b1, c1")) | |
print(timeit.timeit("a2=add(a2, b2, c2)", number=1000, setup="from __main__ import add, a2, b2, c2")) | |
# However, adding an extra constant 2.0 in front and the DistributedArray approach sees a significant penalty | |
# in the last two calls. Why? | |
print("a += 2.0*1*1") | |
print(timeit.timeit("a0=add2(a0, 1, 1)", number=1000, setup="from __main__ import add2, a0")) | |
print(timeit.timeit("a1=add2(a1, 1, 1)", number=1000, setup="from __main__ import add2, a1")) | |
print(timeit.timeit("a2=add2(a2, 1, 1)", number=1000, setup="from __main__ import add2, a2")) | |
print("a += 2.0*b*1") | |
print(timeit.timeit("a0=add2(a0, b0, 1)", number=1000, setup="from __main__ import add2, a0, b0")) | |
print(timeit.timeit("a1=add2(a1, b1, 1)", number=1000, setup="from __main__ import add2, a1, b1")) | |
print(timeit.timeit("a2=add2(a2, b2, 1)", number=1000, setup="from __main__ import add2, a2, b2")) | |
print("a += 2.0*b*c") | |
print(timeit.timeit("a0=add2(a0, b0, c0)", number=1000, setup="from __main__ import add2, a0, b0, c0")) | |
print(timeit.timeit("a1=add2(a1, b1, c1)", number=1000, setup="from __main__ import add2, a1, b1, c1")) | |
print(timeit.timeit("a2=add2(a2, b2, c2)", number=1000, setup="from __main__ import add2, a2, b2, c2")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Running this in Ubuntu gives me only minor differences up to the last two calls, where the difference is significant. If I make the array smaller
If I make the array smaller (16, 16, 16), then the penalty is seen for all tests: