Last active
March 20, 2025 14:23
-
-
Save AssortedFantasy/f57ebe9c2b5c71081db345a7372d6a38 to your computer and use it in GitHub Desktop.
A minimal implementation of higher dimensional slices in Zig.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Multi Dimensional Slices in Zig | |
// Sort of akin to ndarrays in Python's numpy | |
const std = @import("std"); | |
const runtime_safety = std.debug.runtime_safety; | |
const mem = std.mem; | |
const NDSliceErrors = error{ | |
InsufficientBufferSize, | |
ZeroLengthDimensionsNotSupported, | |
IndexOutOfBounds, | |
}; | |
pub const MemoryOrdering = enum { | |
/// Least Signficant dimension last: [z, y, x] where consecutive x's are contiguous | |
row_major, | |
/// Least Signficant dimension first: [z, y, x] where consecutive z's are contiguous | |
col_major, | |
}; | |
/// N Dimensional Slice over an arbitary bit of linear memory | |
/// See test for usage | |
pub fn NDSlice(comptime T: type, comptime N: comptime_int, comptime order: MemoryOrdering) type { | |
return struct { | |
const Self = @This(); | |
/// Length in each dimension {x0, x1, x2, ... xN-1} | |
shape: [N]usize, | |
/// Underlying memory used to store the individual items | |
/// Is shrunk to the required size (buffer.len will yield number of elements) | |
items: []T, | |
pub const order = order; | |
// Memory used has to be passed in. | |
pub fn init(shape: [N]usize, buffer: []T) !Self { | |
var num_items: usize = 1; | |
for (shape) |s| { | |
num_items *= s; | |
} | |
if (num_items > buffer.len) return NDSliceErrors.InsufficientBufferSize; | |
if (num_items == 0) return NDSliceErrors.ZeroLengthDimensionsNotSupported; | |
return Self{ | |
.shape = shape, | |
.items = buffer[0..num_items], | |
}; | |
} | |
/// Computes the linear index of an element | |
pub fn at(self: Self, index: [N]usize) !usize { | |
if (runtime_safety) { | |
for (index) |index_i, i| { | |
if (index_i >= self.shape[i]) return NDSliceErrors.IndexOutOfBounds; | |
} | |
} | |
return switch (order) { | |
.row_major => blk: { | |
// Linear index = ( ... ((i0*s1 + i1)*s2 + i2)*s3 + ... )*s(N-1) + i(N-1) | |
var linear_index = index[0]; | |
comptime var i = 1; | |
inline while (i < N) : (i += 1) { | |
linear_index = linear_index * self.shape[i] + index[i]; // Single fused multiply add | |
} | |
break :blk linear_index; | |
}, | |
.col_major => blk: { | |
// Linear index = i0 + s0*(i1 + s1*(i2 + s2*(...(i(N-2) + s(N-2)*i(N-1)) ... )) | |
var linear_index = index[N - 1]; | |
comptime var i = N - 2; | |
inline while (i >= 0) : (i -= 1) { | |
linear_index = linear_index * self.shape[i] + index[i]; // Single fused mutiply add | |
} | |
break :blk linear_index; | |
}, | |
}; | |
} | |
}; | |
} | |
test "Simple Slice" { | |
// This creates a 2D slice type we can use to represent images, its a MxN slice of triplets of RGB values | |
const ImageSlice = NDSlice([3]u8, 2, .row_major); | |
// This is a buffer, we need to create a buffer to put the slice on | |
var image_buffer = [_][3]u8{.{ 0, 0, 0 }} ** 30; // 6x5 image (width X height) | |
// This slice is created over that buffer. | |
const image = try ImageSlice.init(.{ 5, 6 }, &image_buffer); // By convention height is the first dimension | |
// You use .at() and .items() to access members. | |
image.items[try image.at(.{ 0, 0 })] = .{ 1, 2, 3 }; | |
image.items[try image.at(.{ 1, 1 })] = .{ 50, 50, 50 }; | |
image.items[try image.at(.{ 4, 5 })] = .{ 128, 255, 0 }; | |
image.items[try image.at(.{ 2, 4 })] = .{ 100, 12, 30 }; | |
// You can get each of the individual dimensions with .shape | |
// and for the total number of elements use .items.len | |
} |
Modified to support both types of memory ordering, also fixed some convention things.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tests are trash and don't actually test anything.
API is pretty much as bare bones as it gets.
The index is computed using an unrolled loop which consists of (N-1) FMA instructions (hopefully very fast).
It could technically be made faster by pre-computing the strides and doing the multiplication all in parallel using a vector multiply followed by an add reduce.
But this is a simpler implementation which in practice for a reasonable number of dimensions is extremely fast given FMA instructions.
The only technically redundant thing is that items could be a pointer to an unknown number of elements but then a product over the shape would need to be done any time the total number of elements is required.
Suggestions and tests welcome.