Last active
June 3, 2024 02:17
-
-
Save lassade/f4f1092d507b494b21f3e58d31234ab5 to your computer and use it in GitHub Desktop.
zig decodeHex SIMD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// compile with: -mcpu=x86_64+sse2 to ensure sse2 support | |
pub fn decodeHex(input: []const u8, output: []u8) !void { | |
const block_size = 16; // change block size to use avx2 | |
const ByteBlock = @Vector(block_size, u8); | |
const IntBlock = @Vector(block_size / 4, u32); | |
if (input.len & 1 != 0) { | |
return error.OddLenghtInput; | |
} | |
// the block size padding is nescessary to avoid buffer overflow | |
if (output.len > input.len / 2) { | |
return error.OutputBufferIsTooShort; | |
} | |
var reader = input.ptr; | |
var writer = output.ptr; | |
// note: looking at the assembly this pattern is the only | |
// one that acctually runs this entire block at comptime | |
const unroll_bytes: [block_size]u8 = comptime blk: { | |
var temp: [block_size]u8 = undefined; | |
for (0..block_size / 2) |i| { | |
temp[i + block_size / 2] = @intCast(i *% 2); | |
temp[i] = @intCast(i *% 2 + 1); | |
} | |
break :blk temp; | |
}; | |
const block_read_end = input.ptr + ((input.len / block_size) *% block_size); | |
while (reader != block_read_end) { | |
const block: ByteBlock = @bitCast(reader[0..block_size].*); | |
reader += block_size; | |
const number = block -% @as(ByteBlock, @splat(0x30)); | |
const alpha_uppercase = block -% @as(ByteBlock, @splat(0x41 - 10)); | |
const alpha_lowercase = block -% @as(ByteBlock, @splat(0x61 - 10)); | |
const temp0 = @select(u8, number < @as(ByteBlock, @splat(10)), number, alpha_uppercase); | |
const nibbles = @select(u8, temp0 < @as(ByteBlock, @splat(16)), temp0, alpha_lowercase); | |
if (@reduce(.Or, nibbles >= @as(ByteBlock, @splat(16)))) { | |
return error.InvalidHexSequence; | |
} | |
const hi_nibbles = (@as(IntBlock, @bitCast(nibbles)) & @as(IntBlock, @splat(0x000f_000f))) << @as(IntBlock, @splat(4 + 8)); | |
const lo_nibbles = @as(IntBlock, @bitCast(nibbles)) & @as(IntBlock, @splat(0x0f00_0f00)); | |
const intertwined_bytes: ByteBlock = @bitCast(lo_nibbles | hi_nibbles); | |
const bytes: [block_size]u8 = @bitCast(@shuffle(u8, intertwined_bytes, @as(ByteBlock, @splat(0)), unroll_bytes)); | |
// todo: memcpy isn't that great here | |
writer[0 .. block_size / 2].* = bytes[0 .. block_size / 2].*; | |
//@memcpy(writer[0 .. block_size / 2], bytes[0 .. block_size / 2]); | |
writer += block_size / 2; | |
} | |
const inv: u8 = 255; | |
// zig fmt: off | |
const hex_nibble_decode: [256]u8 = .{ | |
// 0 1 2 3 4 5 6 7 8 9 a b c d e f | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, inv, inv, inv, inv, inv, inv, | |
inv, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, inv, | |
}; | |
// zig fmt: on | |
const read_end = input.ptr + input.len; | |
while (reader != read_end) { | |
const hi = hex_nibble_decode[reader[0]]; | |
const lo = hex_nibble_decode[reader[1]]; | |
reader += 2; | |
if ((hi | lo) > 0xf) return error.InvalidHexSequence; | |
writer[0] = hi << 4 | lo; | |
writer += 1; | |
} | |
} | |
test "decodeHex" { | |
var arena = std.heap.ArenaAllocator.init(std.testing.allocator); | |
defer arena.deinit(); | |
const ally = arena.allocator(); | |
var x = std.rand.Xoshiro256.init(13781); | |
var rng = x.random(); | |
const expected: []u8 = try ally.alloc(u8, 6652); | |
rng.bytes(expected); | |
const input: []u8 = try ally.alloc(u8, expected.len * 2); | |
for (expected, 0..) |byte, i| { | |
_ = try std.fmt.bufPrint(input[i * 2 ..], "{x:0>2}", .{byte}); | |
} | |
const output: []u8 = try ally.alloc(u8, expected.len); | |
try decodeHex(input, output); | |
for (expected, output) |e, o| { | |
try std.testing.expectEqual(e, o); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment