Last active
October 10, 2024 17:25
-
-
Save hughdbrown/48719b8a64991719cffe9458011858a2 to your computer and use it in GitHub Desktop.
Test various strategies for creating arrays of random numbers, some with serial dependencies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["NUMBA_LOOP_VECTORIZE"] = "0" | |
os.environ["NUMBA_SLP_VECTORIZE"] = "0" | |
from datetime import datetime | |
from numba import jit, uint32, uint64 | |
import numpy as np | |
@jit("uint32(uint32)") | |
def lcg(seed): | |
temp = uint64(seed) * 2862933555777941757 + 3037000493 | |
return temp >> 32 | |
@jit | |
def generate_random_numbers(n): | |
result = np.empty((n,), dtype=np.uint32) | |
result[0] = uint32(1) | |
for i in range(1, n): | |
# ๐ result[i] has a data dependency on result[i - 1], preventing | |
# instruction-level parallelism: | |
result[i] = lcg(result[i - 1]) | |
return result | |
def to_image(f): | |
return f(256 * 256 // 4).view(np.uint8).reshape((256, 256)) | |
@jit | |
def generate_not_so_random_numbers(n): | |
result = np.empty((n,), dtype=np.uint32) | |
for i in range(1, n): | |
# ๐ค As a temporary experiment, remove all data dependencies that are | |
# preventing ILP, allowing us to see how much faster we can go: | |
result[i] = lcg(i) | |
return result | |
@jit | |
def generate_random_numbers_3(n): | |
result = np.empty((n,), dtype=np.uint32) | |
# Make sure we have 4 sufficiently different starting points: | |
result[0] = uint32(1) | |
result[1] = lcg(lcg(result[0]) + 1) | |
result[2] = lcg(lcg(result[1]) + 1) | |
result[3] = lcg(lcg(result[2]) + 1) | |
# ๐ Do 4 unrelated calculations that don't share data dependencies with | |
# each other, allowing the CPU to run them in parallel: | |
for i in range(1, n // 4): | |
result[i * 4 ] = lcg(result[(i - 1) * 4 ]) | |
result[i * 4 + 1] = lcg(result[(i - 1) * 4 + 1]) | |
result[i * 4 + 2] = lcg(result[(i - 1) * 4 + 2]) | |
result[i * 4 + 3] = lcg(result[(i - 1) * 4 + 3]) | |
# Calculate the remaining last few values: | |
for i in range(n % 4): | |
result[n - (3 - i)] = lcg(result[n - (3 - i) - 1]) | |
return result | |
@jit | |
def generate_random_numbers_4(n): | |
result = np.empty((n,), dtype=np.uint32) | |
# ๐ Use simple variables to make it faster to access the previous value: | |
random_number1 = uint32(1) | |
random_number2 = lcg(lcg(random_number1) + 1) | |
random_number3 = lcg(lcg(random_number2) + 1) | |
random_number4 = lcg(lcg(random_number3) + 1) | |
for i in range(n // 4): | |
random_number1 = lcg(random_number1) | |
random_number2 = lcg(random_number2) | |
random_number3 = lcg(random_number3) | |
random_number4 = lcg(random_number4) | |
# ๐ Less math, no more array reads: | |
result[i * 4 ] = random_number1 | |
result[i * 4 + 1] = random_number2 | |
result[i * 4 + 2] = random_number3 | |
result[i * 4 + 3] = random_number4 | |
for i in range(n % 4): | |
result[n - (3 - i)] = lcg(result[n - (3 - i) - 1]) | |
return result | |
def main(): | |
fns = [ | |
generate_random_numbers, | |
generate_not_so_random_numbers, | |
generate_random_numbers_3, | |
generate_random_numbers_4, | |
] | |
for fn in fns: | |
print(f"{'-' * 30} {fn.__name__}") | |
for _ in range(10): | |
s = datetime.now() | |
to_image(fn) | |
e = datetime.now() | |
print(f"Elapsed {e - s}") | |
if __name__ == '__main__': | |
main() |
This is one of at least two comments that will result in me moving whole chapters around ๐
So those final results match the text I think? First is slowest, second is fast (but wrong), third is correct (but in the middle speedwise), fourth is fast again.
Yes, much more like what the text says.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
With multiple iterations, to warm up the code generation:
~/Downloads โฏ python3 ~/scratch-code/random_number.py 5s ๎ผ scratch-3.12.0 3.12.0 11:20:57
i------------------------------ generate_random_numbers
Elapsed 0:00:00.060160
Elapsed 0:00:00.000025
Elapsed 0:00:00.000024
Elapsed 0:00:00.000021
Elapsed 0:00:00.000024
Elapsed 0:00:00.000024
Elapsed 0:00:00.000019
Elapsed 0:00:00.000019
Elapsed 0:00:00.000025
Elapsed 0:00:00.000024
i------------------------------ generate_not_so_random_numbers
Elapsed 0:00:00.025977
Elapsed 0:00:00.000010
Elapsed 0:00:00.000007
Elapsed 0:00:00.000008
Elapsed 0:00:00.000008
Elapsed 0:00:00.000008
Elapsed 0:00:00.000007
Elapsed 0:00:00.000007
Elapsed 0:00:00.000006
Elapsed 0:00:00.000006
i------------------------------ generate_random_numbers_3
Elapsed 0:00:00.061300
Elapsed 0:00:00.000018
Elapsed 0:00:00.000015
Elapsed 0:00:00.000017
Elapsed 0:00:00.000013
Elapsed 0:00:00.000012
Elapsed 0:00:00.000017
Elapsed 0:00:00.000016
Elapsed 0:00:00.000017
Elapsed 0:00:00.000015
i------------------------------ generate_random_numbers_4
Elapsed 0:00:00.057804
Elapsed 0:00:00.000010
Elapsed 0:00:00.000007
Elapsed 0:00:00.000008
Elapsed 0:00:00.000007
Elapsed 0:00:00.000007
Elapsed 0:00:00.000006
Elapsed 0:00:00.000006
Elapsed 0:00:00.000006
Elapsed 0:00:00.000006