Skip to content

Instantly share code, notes, and snippets.

@mgravell
Created August 20, 2024 10:21
Show Gist options
  • Save mgravell/6ce564ae5f3ace58c8a8e489e3434827 to your computer and use it in GitHub Desktop.
Save mgravell/6ce564ae5f3ace58c8a8e489e3434827 to your computer and use it in GitHub Desktop.
BenchmarkDotNet v0.14.0, Windows 11 (10.0.26120.1350)
AMD Ryzen 9 7900X, 1 CPU, 24 logical and 12 physical cores
.NET SDK 8.0.203
[Host] : .NET 8.0.7 (8.0.724.31311), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
DefaultJob : .NET 8.0.7 (8.0.724.31311), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
| Method | Number | Mean | Error | StdDev | Median |
|------- |------- |----------:|----------:|----------:|----------:|
| Fixed | 8 | 0.6012 ns | 0.0042 ns | 0.0037 ns | 0.6014 ns |
| Span | 8 | 0.5640 ns | 0.0113 ns | 0.0206 ns | 0.5706 ns |
| RefAdd | 8 | 0.5567 ns | 0.0110 ns | 0.0263 ns | 0.5670 ns |
| Fixed | 16 | 1.0129 ns | 0.0087 ns | 0.0082 ns | 1.0096 ns |
| Span | 16 | 0.7861 ns | 0.0068 ns | 0.0063 ns | 0.7857 ns |
| RefAdd | 16 | 0.7861 ns | 0.0042 ns | 0.0039 ns | 0.7868 ns |
| Fixed | 143 | 1.4058 ns | 0.0130 ns | 0.0122 ns | 1.4102 ns |
| Span | 143 | 1.2048 ns | 0.0153 ns | 0.0143 ns | 1.2042 ns |
| RefAdd | 143 | 1.1970 ns | 0.0100 ns | 0.0089 ns | 1.1949 ns |
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
[module: SkipLocalsInit]
BenchmarkRunner.Run(typeof(BufferBenchmarks), args: args);
public class BufferBenchmarks
{
[Params(8, 16, 143)] // only interested in optimized paths
public ulong Number { get; set; }
// loop to avoid measuring stackalloc overhead
private const int OperationsPerInvoke = 1024;
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void Fixed()
{
Span<byte> span = stackalloc byte[4];
ulong number = Number;
for (int i = 0; i < OperationsPerInvoke; i++)
{
TestImpl.WriteNumericFixed(span, NoOpWriter.Instance, number);
}
}
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void Span()
{
Span<byte> span = stackalloc byte[4];
ulong number = Number;
for (int i = 0; i < OperationsPerInvoke; i++)
{
TestImpl.WriteNumericSpan(span, NoOpWriter.Instance, number);
}
}
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public void RefAdd()
{
Span<byte> span = stackalloc byte[4];
ulong number = Number;
for (int i = 0; i < OperationsPerInvoke; i++)
{
TestImpl.WriteNumericRefAdd(span, NoOpWriter.Instance, number);
}
}
}
sealed class NoOpWriter : IBufferWriter<byte>
{
internal static readonly NoOpWriter Instance = new();
private NoOpWriter() { }
public void Advance(int count) { } // really just want this API
public Memory<byte> GetMemory(int sizeHint = 0) => throw new NotSupportedException();
public Span<byte> GetSpan(int sizeHint = 0) => GetMemory(sizeHint).Span;
}
static class TestImpl
{
private const int _maxULongByteLength = 20;
[ThreadStatic]
private static byte[]? _numericBytesScratch;
private static byte[] NumericBytesScratch => _numericBytesScratch ?? CreateNumericBytesScratch();
[MethodImpl(MethodImplOptions.NoInlining)]
private static byte[] CreateNumericBytesScratch()
{
var bytes = new byte[_maxULongByteLength];
_numericBytesScratch = bytes;
return bytes;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe void WriteNumericFixed(Span<byte> span, IBufferWriter<byte> buffer, ulong number)
{
const byte AsciiDigitStart = (byte)'0';
//var span = buffer.Span;
var bytesLeftInBlock = span.Length;
// Fast path, try copying to the available memory directly
var simpleWrite = true;
fixed (byte* output = span)
{
var start = output;
if (number < 10 && bytesLeftInBlock >= 1)
{
*(start) = (byte)(((uint)number) + AsciiDigitStart);
buffer.Advance(1);
}
else if (number < 100 && bytesLeftInBlock >= 2)
{
var val = (uint)number;
var tens = (byte)((val * 205u) >> 11); // div10, valid to 1028
*(start) = (byte)(tens + AsciiDigitStart);
*(start + 1) = (byte)(val - (tens * 10) + AsciiDigitStart);
buffer.Advance(2);
}
else if (number < 1000 && bytesLeftInBlock >= 3)
{
var val = (uint)number;
var digit0 = (byte)((val * 41u) >> 12); // div100, valid to 1098
var digits01 = (byte)((val * 205u) >> 11); // div10, valid to 1028
*(start) = (byte)(digit0 + AsciiDigitStart);
*(start + 1) = (byte)(digits01 - (digit0 * 10) + AsciiDigitStart);
*(start + 2) = (byte)(val - (digits01 * 10) + AsciiDigitStart);
buffer.Advance(3);
}
else
{
simpleWrite = false;
}
}
if (!simpleWrite)
{
WriteNumericMultiWrite(buffer, number);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void WriteNumericSpan(Span<byte> span, IBufferWriter<byte> buffer, ulong number)
{
const byte AsciiDigitStart = (byte)'0';
//var span = buffer.Span;
var bytesLeftInBlock = span.Length;
// Fast path, try copying to the available memory directly
var simpleWrite = true;
if (number < 10 && bytesLeftInBlock >= 1)
{
span[0] = (byte)(((uint)number) + AsciiDigitStart);
buffer.Advance(1);
}
else if (number < 100 && bytesLeftInBlock >= 2)
{
var val = (uint)number;
var tens = (byte)((val * 205u) >> 11); // div10, valid to 1028
span[0] = (byte)(tens + AsciiDigitStart);
span[1] = (byte)(val - (tens * 10) + AsciiDigitStart);
buffer.Advance(2);
}
else if (number < 1000 && bytesLeftInBlock >= 3)
{
var val = (uint)number;
var digit0 = (byte)((val * 41u) >> 12); // div100, valid to 1098
var digits01 = (byte)((val * 205u) >> 11); // div10, valid to 1028
span[0] = (byte)(digit0 + AsciiDigitStart);
span[1] = (byte)(digits01 - (digit0 * 10) + AsciiDigitStart);
span[2] = (byte)(val - (digits01 * 10) + AsciiDigitStart);
buffer.Advance(3);
}
else
{
simpleWrite = false;
}
if (!simpleWrite)
{
WriteNumericMultiWrite(buffer, number);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void WriteNumericRefAdd(Span<byte> span, IBufferWriter<byte> buffer, ulong number)
{
const byte AsciiDigitStart = (byte)'0';
ref byte start = ref MemoryMarshal.GetReference(span);
//var span = buffer.Span;
var bytesLeftInBlock = span.Length;
// Fast path, try copying to the available memory directly
var simpleWrite = true;
if (number < 10 && bytesLeftInBlock >= 1)
{
start = (byte)(((uint)number) + AsciiDigitStart);
buffer.Advance(1);
}
else if (number < 100 && bytesLeftInBlock >= 2)
{
var val = (uint)number;
var tens = (byte)((val * 205u) >> 11); // div10, valid to 1028
start = (byte)(tens + AsciiDigitStart);
Unsafe.AddByteOffset(ref start, 1) = (byte)(val - (tens * 10) + AsciiDigitStart);
buffer.Advance(2);
}
else if (number < 1000 && bytesLeftInBlock >= 3)
{
var val = (uint)number;
var digit0 = (byte)((val * 41u) >> 12); // div100, valid to 1098
var digits01 = (byte)((val * 205u) >> 11); // div10, valid to 1028
start = (byte)(digit0 + AsciiDigitStart);
Unsafe.AddByteOffset(ref start, 1) = (byte)(digits01 - (digit0 * 10) + AsciiDigitStart);
Unsafe.AddByteOffset(ref start, 2) = (byte)(val - (digits01 * 10) + AsciiDigitStart);
buffer.Advance(3);
}
else
{
simpleWrite = false;
}
if (!simpleWrite)
{
WriteNumericMultiWrite(buffer, number);
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
private static void WriteNumericMultiWrite(IBufferWriter<byte> buffer, ulong number)
{
throw new NotImplementedException();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment