using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Running; Random r = new(71); for (int i = 0; i < 1024; i++) { Vector128<byte> A128 = default; Vector128<byte> B128 = default; Vector256<byte> A256 = default; Vector256<byte> B256 = default; r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector128<byte>>(ref A128))); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector128<byte>>(ref B128))); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector256<byte>>(ref A256))); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector256<byte>>(ref B256))); var result128A = Impl.ShuffleUnsafe(A128, B128); var result256A = Impl.ShuffleUnsafe(A256, B256); B128 &= Vector128.Create((byte)0x8F); B256 &= Vector256.Create((byte)0x9F); var result128B = Vector128.Shuffle(A128, B128); var result256B = Vector256.Shuffle(A256, B256); if (result128A != result128B) { Console.WriteLine($"{i}: Error 128: {A128} | {B128} => {result128A}, expected: {result128B}"); } if (result256A != result256B) { Console.WriteLine($"{i}: Error 256: {A256} | {B256} => {result256A}, expected: {result256B}"); } } BenchmarkRunner.Run<Bench>(); public class Bench { public Vector128<byte> A128; public Vector128<byte> B128; public Vector256<byte> A256; public Vector256<byte> B256; [GlobalSetup] public void Setup() { Random r = new(71); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector128<byte>>(ref A128))); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector128<byte>>(ref B128))); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector256<byte>>(ref A256))); r.NextBytes(MemoryMarshal.AsBytes(new Span<Vector256<byte>>(ref B256))); } [Benchmark] public Vector128<byte> Shuffle128() { return Vector128.Shuffle(A128, B128); } [Benchmark] public Vector128<byte> ShuffleUnsafe128() { return Impl.ShuffleUnsafe(A128, B128); } [Benchmark] public Vector256<byte> Shuffle256() { return Vector256.Shuffle(A256, B256); } [Benchmark] public Vector256<byte> ShuffleUnsafe256() { return Impl.ShuffleUnsafe(A256, B256); } } public static class Impl { public static Vector128<byte> ShuffleUnsafe(Vector128<byte> values, Vector128<byte> indices) { if (Ssse3.IsSupported) return Ssse3.Shuffle(values, indices); return Vector128.Shuffle(values, indices); } public static Vector256<byte> ShuffleUnsafe(Vector256<byte> values, Vector256<byte> indices) { if (Avx2.IsSupported) { var indicesXord = Avx2.And(Avx2.Xor(indices, Vector256.Create(Vector128.Create((byte)0), Vector128.Create((byte)0x10))), Vector256.Create((byte)0x9F)); var swap = Avx2.Permute2x128(values, values, 0b00000001); var shuf1 = Avx2.Shuffle(values, indices); var shuf2 = Avx2.Shuffle(swap, indices); var selection = Avx2.CompareGreaterThan(indicesXord.AsSByte(), Vector256.Create((sbyte)0x0F)).AsByte(); return Avx2.BlendVariable(shuf1, shuf2, selection); } return Vector256.Shuffle(values, indices); } }