Skip to content

Instantly share code, notes, and snippets.

@samaid
Created May 14, 2023 19:53
Show Gist options
  • Save samaid/7024989bf7c834c93dd613944d3e9b34 to your computer and use it in GitHub Desktop.
Save samaid/7024989bf7c834c93dd613944d3e9b34 to your computer and use it in GitHub Desktop.
Raising to the power leads to 10x performance slowdown in double-nested loop
from numba import njit, prange
import numpy as np
import math
from time import time
SOFTENING_SQ = 0.001
G = 1.0
@njit(parallel=True)
def foo(m, pos, vel, acc):
mass = m
velocity = vel
acceleration = acc
dt = 0.01
for i in prange(len(m)):
velocity[i] += acceleration[i] * dt * 0.5 # Perform half-step with old acceleration
pos[i] += velocity[i] * dt # Update position with old velocity
for i in prange(len(m)):
x1 = pos[i]
ax = 0.0
for j in range(len(m)):
x2 = pos[j]
dx = x2 - x1
dx2 = dx * dx
r2 = dx2 + 0.1
inv_r3 = r2
# 3.626232147216797
# 2.376734972000122
# inv_r3 = math.pow(r2, -1.5)
# 38.593775510787964
# 37.70081043243408
# inv_r3 = r2 ** (-1.5)
# 38.75098156929016
# 37.54234862327576
# inv_r3 = np.power(r2, -1.5)
# 40.880536794662476
# 39.159523248672485
ax += G * dx * inv_r3 * mass[j]
acceleration[i] = ax
velocity[i] += acceleration[i] * dt * 0.5 # Perform half-step with new acceleration
return pos, velocity, acceleration
N = 100000
m = np.random.uniform(10, 100, N)
pos = np.zeros(N)
vel = np.zeros(N)
acc = np.zeros(N)
t1 = time()
pos, vel, acc = foo(m, pos, vel, acc)
t2 = time()
print(t2-t1)
t1 = time()
pos, vel, acc = foo(m, pos, vel, acc)
t2 = time()
print(t2-t1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment