Skip to content

Instantly share code, notes, and snippets.

@lassade
Last active June 3, 2024 02:17
Show Gist options
  • Save lassade/f4f1092d507b494b21f3e58d31234ab5 to your computer and use it in GitHub Desktop.
Save lassade/f4f1092d507b494b21f3e58d31234ab5 to your computer and use it in GitHub Desktop.
zig decodeHex SIMD
// compile with: -mcpu=x86_64+sse2 to ensure sse2 support
pub fn decodeHex(input: []const u8, output: []u8) !void {
const block_size = 16; // change block size to use avx2
const ByteBlock = @Vector(block_size, u8);
const IntBlock = @Vector(block_size / 4, u32);
if (input.len & 1 != 0) {
return error.OddLenghtInput;
}
// the block size padding is nescessary to avoid buffer overflow
if (output.len > input.len / 2) {
return error.OutputBufferIsTooShort;
}
var reader = input.ptr;
var writer = output.ptr;
// note: looking at the assembly this pattern is the only
// one that acctually runs this entire block at comptime
const unroll_bytes: [block_size]u8 = comptime blk: {
var temp: [block_size]u8 = undefined;
for (0..block_size / 2) |i| {
temp[i + block_size / 2] = @intCast(i *% 2);
temp[i] = @intCast(i *% 2 + 1);
}
break :blk temp;
};
const block_read_end = input.ptr + ((input.len / block_size) *% block_size);
while (reader != block_read_end) {
const block: ByteBlock = @bitCast(reader[0..block_size].*);
reader += block_size;
const number = block -% @as(ByteBlock, @splat(0x30));
const alpha_uppercase = block -% @as(ByteBlock, @splat(0x41 - 10));
const alpha_lowercase = block -% @as(ByteBlock, @splat(0x61 - 10));
const temp0 = @select(u8, number < @as(ByteBlock, @splat(10)), number, alpha_uppercase);
const nibbles = @select(u8, temp0 < @as(ByteBlock, @splat(16)), temp0, alpha_lowercase);
if (@reduce(.Or, nibbles >= @as(ByteBlock, @splat(16)))) {
return error.InvalidHexSequence;
}
const hi_nibbles = (@as(IntBlock, @bitCast(nibbles)) & @as(IntBlock, @splat(0x000f_000f))) << @as(IntBlock, @splat(4 + 8));
const lo_nibbles = @as(IntBlock, @bitCast(nibbles)) & @as(IntBlock, @splat(0x0f00_0f00));
const intertwined_bytes: ByteBlock = @bitCast(lo_nibbles | hi_nibbles);
const bytes: [block_size]u8 = @bitCast(@shuffle(u8, intertwined_bytes, @as(ByteBlock, @splat(0)), unroll_bytes));
// todo: memcpy isn't that great here
writer[0 .. block_size / 2].* = bytes[0 .. block_size / 2].*;
//@memcpy(writer[0 .. block_size / 2], bytes[0 .. block_size / 2]);
writer += block_size / 2;
}
const inv: u8 = 255;
// zig fmt: off
const hex_nibble_decode: [256]u8 = .{
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, inv, inv, inv, inv, inv, inv,
inv, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv,
};
// zig fmt: on
const read_end = input.ptr + input.len;
while (reader != read_end) {
const hi = hex_nibble_decode[reader[0]];
const lo = hex_nibble_decode[reader[1]];
reader += 2;
if ((hi | lo) > 0xf) return error.InvalidHexSequence;
writer[0] = hi << 4 | lo;
writer += 1;
}
}
test "decodeHex" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const ally = arena.allocator();
var x = std.rand.Xoshiro256.init(13781);
var rng = x.random();
const expected: []u8 = try ally.alloc(u8, 6652);
rng.bytes(expected);
const input: []u8 = try ally.alloc(u8, expected.len * 2);
for (expected, 0..) |byte, i| {
_ = try std.fmt.bufPrint(input[i * 2 ..], "{x:0>2}", .{byte});
}
const output: []u8 = try ally.alloc(u8, expected.len);
try decodeHex(input, output);
for (expected, output) |e, o| {
try std.testing.expectEqual(e, o);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment