Skip to content

Instantly share code, notes, and snippets.

@ladeak
Created May 14, 2025 08:04
Show Gist options
  • Save ladeak/5af4b1981524462494a38057487f6d97 to your computer and use it in GitHub Desktop.
Save ladeak/5af4b1981524462494a38057487f6d97 to your computer and use it in GitHub Desktop.
Sum simd performance
using System.Numerics;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Running;
var b = new Benchmarks();
b.Setup();
var result = b.Sum(b._input);
Console.WriteLine(result.Item1);
Console.WriteLine(result.Item2);
result = b.SumSimd2(b._input);
Console.WriteLine(result.Item1);
Console.WriteLine(result.Item2);
BenchmarkRunner.Run<Benchmarks>();
[HardwareCounters(
HardwareCounter.BranchMispredictions,
HardwareCounter.BranchInstructions)]
public class Benchmarks
{
public byte[] _input = [];
[GlobalSetup]
public void Setup()
{
_input = Enumerable.Range(1, 1_000_000).Select(x => (byte)(x % 256)).ToArray();
Random.Shared.Shuffle(_input);
}
[Benchmark]
public void Sum() => Sum(_input);
[Benchmark]
public void SumSimd2() => SumSimd2(_input);
public (uint, uint) Sum(byte[] input)
{
uint sumSmall = 0;
uint sumTotal = 0;
for (var i = 0; i < input.Length; i++)
{
if (input[i] < 128)
sumSmall += input[i];
sumTotal += input[i];
}
return (sumSmall, sumTotal);
}
public (uint, uint) SumSimd2(byte[] input)
{
Vector<uint> vSumSmall = Vector<uint>.Zero;
Vector<uint> vSumTotal = Vector<uint>.Zero;
Vector<ushort> vInterSmall = Vector<ushort>.Zero;
Vector<ushort> vInterTotal = Vector<ushort>.Zero;
byte b = 0;
Vector<byte> vLimit = Vector.Create<byte>(128);
int i = input.Length - Vector<byte>.Count;
for (; i >= 0; i -= Vector<byte>.Count)
{
var vCurrent = Vector.LoadUnsafe(ref input[i]);
var vMask = Vector.LessThan(vCurrent, vLimit);
var vDiff = Vector.ConditionalSelect(vMask, vCurrent, Vector<byte>.Zero);
Vector.Widen(vDiff, out var lower, out var upper);
vInterSmall += lower + upper;
Vector.Widen(vCurrent, out lower, out upper);
vInterTotal += lower + upper;
unchecked { b += 2; }
if (b == 0)
{
Vector.Widen(vInterSmall, out var lower2, out var upper2);
vSumSmall += lower2 + upper2;
Vector.Widen(vInterTotal, out lower2, out upper2);
vSumTotal += lower2 + upper2;
vInterSmall = Vector<ushort>.Zero;
vInterTotal = Vector<ushort>.Zero;
}
}
Vector.Widen(vInterSmall, out var flower2, out var fupper2);
vSumSmall += flower2 + fupper2;
Vector.Widen(vInterTotal, out flower2, out fupper2);
vSumTotal += flower2 + fupper2;
var sumSmall = Vector.Sum(vSumSmall);
var sumTotal = Vector.Sum(vSumTotal);
for (i += Vector<byte>.Count - 1; i >= 0; i--)
{
if (input[i] < 128)
sumSmall += input[i];
sumTotal += input[i];
}
return (sumSmall, sumTotal);
}
}
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" />
</ItemGroup>
</Project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment