Created
September 9, 2023 09:54
-
-
Save SimonDanisch/037f4561f08e603f1f4f092d5648019b to your computer and use it in GitHub Desktop.
Mojo vs Julia mandelbrot benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Super simple SIMD implementation for Complex Numbers | |
struct ComplexSIMD{N,T} | |
re::NTuple{N,T} | |
im::NTuple{N,T} | |
end | |
Base.abs2(z::ComplexSIMD) = z.re .* z.re .+ z.im .* z.im | |
Base.:(*)(a::ComplexSIMD, b::ComplexSIMD) = ComplexSIMD(a.re .* b.re .- a.im .* b.im, a.re .* b.im .+ a.im .* b.re) | |
Base.:(+)(a::ComplexSIMD, b::ComplexSIMD) = ComplexSIMD(a.re .+ b.re, a.im .+ b.im) | |
function mandelbrot_kernel(c::ComplexSIMD{N,T}) where {N,T} | |
z = c | |
mask = ntuple(k -> true, N) | |
iters = ntuple(k -> T(0), N) | |
for i in 1:200 | |
!any(mask) && return iters | |
mask = abs2(z) .<= 4.0 | |
iters = iters .+ (mask .* 1) | |
z = z * z + c | |
end | |
return iters | |
end | |
function inner_loop(result, x_range, y_range, y, ::Val{N}) where N | |
ci = y_range[y] | |
for x in 1:N:length(x_range) | |
c = ComplexSIMD(ntuple(k -> x_range[x+k-1], N), ntuple(k -> ci, N)) | |
iters = mandelbrot_kernel(c) | |
foreach(enumerate(iters)) do (k, v) | |
# Write the result to the output array | |
result[y, x+k-1] = v | |
end | |
end | |
end | |
function mandelbrot(result, x_range, y_range, N) | |
@sync for y in eachindex(y_range) | |
Threads.@spawn @inbounds begin | |
inner_loop(result, x_range, y_range, y, N) | |
end | |
end | |
return result | |
end | |
using BenchmarkTools | |
N = 960 | |
x_range = range(-2.0, 0.6, N) | |
y_range = range(-1.5, 1.5, N) | |
result = zeros(N, N) | |
@btime mandelbrot(result, x_range, y_range, Val(4)); | |
using GLMakie | |
heatmap(result, colormap=:viridis, colorscale=sqrt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from benchmark import Benchmark | |
from complex import ComplexSIMD, ComplexFloat64 | |
from math import iota | |
from python import Python | |
from runtime.llcl import num_cores, Runtime | |
from algorithm import parallelize, vectorize | |
from tensor import Tensor | |
from utils.index import Index | |
alias width = 960 | |
alias height = 960 | |
alias MAX_ITERS = 200 | |
alias min_x = -2.0 | |
alias max_x = 0.6 | |
alias min_y = -1.5 | |
alias max_y = 1.5 | |
alias float_type = DType.float64 | |
alias simd_width = simdwidthof[float_type]() | |
def show_plot(tensor: Tensor[float_type]): | |
alias scale = 10 | |
alias dpi = 64 | |
np = Python.import_module("numpy") | |
plt = Python.import_module("matplotlib.pyplot") | |
colors = Python.import_module("matplotlib.colors") | |
numpy_array = np.zeros((height, width), np.float64) | |
for row in range(height): | |
for col in range(width): | |
numpy_array.itemset((col, row), tensor[col, row]) | |
fig = plt.figure(1, [scale, scale * height // width], dpi) | |
ax = fig.add_axes([0.0, 0.0, 1.0, 1.0], False, 1) | |
light = colors.LightSource(315, 10, 0, 1, 1, 0) | |
image = light.shade( | |
numpy_array, plt.cm.hot, colors.PowerNorm(0.3), "hsv", 0, 0, 1.5 | |
) | |
plt.imshow(image) | |
plt.axis("off") | |
plt.show() | |
fn mandelbrot_kernel_SIMD[ | |
simd_width: Int | |
](c: ComplexSIMD[float_type, simd_width]) -> SIMD[float_type, simd_width]: | |
"""A vectorized implementation of the inner mandelbrot computation.""" | |
var z = ComplexSIMD[float_type, simd_width](0, 0) | |
var iters = SIMD[float_type, simd_width](0) | |
var in_set_mask: SIMD[DType.bool, simd_width] = True | |
for i in range(MAX_ITERS): | |
if not in_set_mask.reduce_or(): | |
break | |
in_set_mask = z.squared_norm() <= 4 | |
iters = in_set_mask.select(iters + 1, iters) | |
z = z.squared_add(c) | |
return iters | |
fn parallelized(): | |
let t = Tensor[float_type](height, width) | |
@parameter | |
fn worker(row: Int): | |
let scale_x = (max_x - min_x) / width | |
let scale_y = (max_y - min_y) / height | |
@parameter | |
fn compute_vector[simd_width: Int](col: Int): | |
"""Each time we oeprate on a `simd_width` vector of pixels.""" | |
let cx = min_x + (col + iota[float_type, simd_width]()) * scale_x | |
let cy = min_y + row * scale_y | |
let c = ComplexSIMD[float_type, simd_width](cx, cy) | |
t.data().simd_store[simd_width]( | |
row * width + col, mandelbrot_kernel_SIMD[simd_width](c) | |
) | |
# Vectorize the call to compute_vector where call gets a chunk of pixels. | |
vectorize[simd_width, compute_vector](width) | |
with Runtime() as rt: | |
@parameter | |
fn bench_parallel[simd_width: Int](): | |
parallelize[worker](rt, height, 5 * num_cores()) | |
alias simd_width = simdwidthof[DType.float64]() | |
let parallelized = Benchmark().run[bench_parallel[simd_width]]() / 1e6 | |
print("Parallelized:", parallelized, "ms") | |
try: | |
_ = show_plot(t) | |
except e: | |
print("failed to show plot:", e.value) | |
def main(): | |
parallelized() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment