Created
May 14, 2025 08:04
-
-
Save ladeak/5af4b1981524462494a38057487f6d97 to your computer and use it in GitHub Desktop.
Sum simd performance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Numerics; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Diagnosers; | |
using BenchmarkDotNet.Running; | |
var b = new Benchmarks(); | |
b.Setup(); | |
var result = b.Sum(b._input); | |
Console.WriteLine(result.Item1); | |
Console.WriteLine(result.Item2); | |
result = b.SumSimd2(b._input); | |
Console.WriteLine(result.Item1); | |
Console.WriteLine(result.Item2); | |
BenchmarkRunner.Run<Benchmarks>(); | |
[HardwareCounters( | |
HardwareCounter.BranchMispredictions, | |
HardwareCounter.BranchInstructions)] | |
public class Benchmarks | |
{ | |
public byte[] _input = []; | |
[GlobalSetup] | |
public void Setup() | |
{ | |
_input = Enumerable.Range(1, 1_000_000).Select(x => (byte)(x % 256)).ToArray(); | |
Random.Shared.Shuffle(_input); | |
} | |
[Benchmark] | |
public void Sum() => Sum(_input); | |
[Benchmark] | |
public void SumSimd2() => SumSimd2(_input); | |
public (uint, uint) Sum(byte[] input) | |
{ | |
uint sumSmall = 0; | |
uint sumTotal = 0; | |
for (var i = 0; i < input.Length; i++) | |
{ | |
if (input[i] < 128) | |
sumSmall += input[i]; | |
sumTotal += input[i]; | |
} | |
return (sumSmall, sumTotal); | |
} | |
public (uint, uint) SumSimd2(byte[] input) | |
{ | |
Vector<uint> vSumSmall = Vector<uint>.Zero; | |
Vector<uint> vSumTotal = Vector<uint>.Zero; | |
Vector<ushort> vInterSmall = Vector<ushort>.Zero; | |
Vector<ushort> vInterTotal = Vector<ushort>.Zero; | |
byte b = 0; | |
Vector<byte> vLimit = Vector.Create<byte>(128); | |
int i = input.Length - Vector<byte>.Count; | |
for (; i >= 0; i -= Vector<byte>.Count) | |
{ | |
var vCurrent = Vector.LoadUnsafe(ref input[i]); | |
var vMask = Vector.LessThan(vCurrent, vLimit); | |
var vDiff = Vector.ConditionalSelect(vMask, vCurrent, Vector<byte>.Zero); | |
Vector.Widen(vDiff, out var lower, out var upper); | |
vInterSmall += lower + upper; | |
Vector.Widen(vCurrent, out lower, out upper); | |
vInterTotal += lower + upper; | |
unchecked { b += 2; } | |
if (b == 0) | |
{ | |
Vector.Widen(vInterSmall, out var lower2, out var upper2); | |
vSumSmall += lower2 + upper2; | |
Vector.Widen(vInterTotal, out lower2, out upper2); | |
vSumTotal += lower2 + upper2; | |
vInterSmall = Vector<ushort>.Zero; | |
vInterTotal = Vector<ushort>.Zero; | |
} | |
} | |
Vector.Widen(vInterSmall, out var flower2, out var fupper2); | |
vSumSmall += flower2 + fupper2; | |
Vector.Widen(vInterTotal, out flower2, out fupper2); | |
vSumTotal += flower2 + fupper2; | |
var sumSmall = Vector.Sum(vSumSmall); | |
var sumTotal = Vector.Sum(vSumTotal); | |
for (i += Vector<byte>.Count - 1; i >= 0; i--) | |
{ | |
if (input[i] < 128) | |
sumSmall += input[i]; | |
sumTotal += input[i]; | |
} | |
return (sumSmall, sumTotal); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<Project Sdk="Microsoft.NET.Sdk"> | |
<PropertyGroup> | |
<OutputType>Exe</OutputType> | |
<TargetFramework>net10.0</TargetFramework> | |
<ImplicitUsings>enable</ImplicitUsings> | |
<Nullable>enable</Nullable> | |
</PropertyGroup> | |
<ItemGroup> | |
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" /> | |
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" /> | |
</ItemGroup> | |
</Project> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment