Skip to content

Instantly share code, notes, and snippets.

@EgorBo
Last active December 20, 2021 18:20
Show Gist options
  • Save EgorBo/d7e5982cc9777c1cabdee7857b072468 to your computer and use it in GitHub Desktop.
Save EgorBo/d7e5982cc9777c1cabdee7857b072468 to your computer and use it in GitHub Desktop.
sse-vs-avx-string-equals.cs
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
//BenchmarkDotNet=v0.13.1, OS=Windows 10.0.19042.1415 (20H2/October2020Update)
//Intel Core i7-8700K CPU 3.70GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores
//.NET SDK=6.0.100
// [Host] : .NET 6.0.0 (6.0.21.52210), X64 RyuJIT
// DefaultJob : .NET 6.0.0 (6.0.21.52210), X64 RyuJIT
//| Method | headerName | Mean | Error | StdDev | Ratio |
//|----------------- |------------------- |---------:|----------:|----------:|------:|
//| StringEqauls | PROXY-AUTHENTICATE | 9.739 ns | 0.1571 ns | 0.1470 ns | 1.00 |
//| StringEqauls_AVX | PROXY-AUTHENTICATE | 1.282 ns | 0.0040 ns | 0.0038 ns | 0.13 |
//| StringEqauls_SSE | PROXY-AUTHENTICATE | 3.203 ns | 0.0048 ns | 0.0043 ns | 0.33 |
//| | | | | | |
//| StringEqauls | proxy-authenticate | 9.130 ns | 0.0809 ns | 0.0675 ns | 1.00 |
//| StringEqauls_AVX | proxy-authenticate | 1.285 ns | 0.0041 ns | 0.0035 ns | 0.14 |
//| StringEqauls_SSE | proxy-authenticate | 3.182 ns | 0.0059 ns | 0.0052 ns | 0.35 |
public class Benchmarks
{
[Benchmark(Baseline = true)]
[Arguments("proxy-authenticate")]
[Arguments("PROXY-AUTHENTICATE")]
public bool StringEqauls(string headerName)
{
return string.Equals(headerName, "Proxy-Authenticate", StringComparison.OrdinalIgnoreCase);
}
[Benchmark]
[Arguments("proxy-authenticate")]
[Arguments("PROXY-AUTHENTICATE")]
public bool StringEqauls_AVX(string headerName)
{
return object.ReferenceEquals(headerName, "Proxy-Authenticate") ||
(headerName.Length == "Proxy-Authenticate".Length &&
CompareStringVsTwoVectors256AsciiIgnoreCase(headerName,
// Split "Proxy-Authenticate" into two 32-bytes vectors: "Proxy-Authentica" and "oxy-Authenticate"
Vector256.Create('P', 'R', 'O', 'X', 'Y', '-', 'A', 'U', 'T', 'H', 'E', 'N', 'T', 'I', 'C', 'A'),
Vector256.Create('O', 'X', 'Y', '-', 'A', 'U', 'T', 'H', 'E', 'N', 'T', 'I', 'C', 'A', 'T', 'E')));
}
[Benchmark]
[Arguments("proxy-authenticate")]
[Arguments("PROXY-AUTHENTICATE")]
public bool StringEqauls_SSE(string headerName)
{
return object.ReferenceEquals(headerName, "Proxy-Authenticate") ||
(headerName.Length == "Proxy-Authenticate".Length &&
CompareStringVsThreeVectors128AsciiIgnoreCase(headerName,
Vector128.Create('P', 'R', 'O', 'X', 'Y', '-', 'A', 'U'),
Vector128.Create('T', 'H', 'E', 'N', 'T', 'I', 'C', 'A'),
Vector128.Create('E', 'N', 'T', 'I', 'C', 'A', 'T', 'E')));
}
private static bool CompareStringVsTwoVectors256AsciiIgnoreCase(string str1,
// str2 must be lower case
Vector256<ushort> str2_v1,
Vector256<ushort> str2_v2)
{
ReadOnlySpan<char> span = str1.AsSpan();
ref char spanStart = ref MemoryMarshal.GetReference(span);
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart));
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>(
ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str1.Length - 16)));
return Avx2.MoveMask(Avx2.CompareEqual(ToUpper256(v1), str2_v1).AsByte()) == -1 &&
Avx2.MoveMask(Avx2.CompareEqual(ToUpper256(v2), str2_v2).AsByte()) == -1;
}
private static bool CompareStringVsThreeVectors128AsciiIgnoreCase(string str1,
// str2 must be lower case
Vector128<ushort> str2_v1,
Vector128<ushort> str2_v2,
Vector128<ushort> str2_v3)
{
ReadOnlySpan<char> span = str1.AsSpan();
ref char spanStart = ref MemoryMarshal.GetReference(span);
var v1 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref spanStart));
var v2 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, 8)));
var v3 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str1.Length - 8)));
return Sse2.MoveMask(Sse2.CompareEqual(ToUpper128(v1), str2_v1).AsByte()) == 0xFFFF &&
Sse2.MoveMask(Sse2.CompareEqual(ToUpper128(v2), str2_v2).AsByte()) == 0xFFFF &&
Sse2.MoveMask(Sse2.CompareEqual(ToUpper128(v3), str2_v3).AsByte()) == 0xFFFF;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static Vector256<ushort> ToUpper256(Vector256<ushort> vec)
{
var rangeshift = Avx2.Subtract(vec.AsByte(), Vector256.Create((byte)('a' + 128)));
var nomodify = Avx2.CompareGreaterThan(rangeshift.AsSByte(), Vector256.Create((sbyte)(-128 + 25)));
var flip = Avx2.AndNot(nomodify.AsByte(), Vector256.Create((byte)0x20));
return Avx2.Xor(vec, flip.AsUInt16());
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static Vector128<ushort> ToUpper128(Vector128<ushort> vec)
{
var rangeshift = Sse2.Subtract(vec.AsByte(), Vector128.Create((byte)('a' + 128)));
var nomodify = Sse2.CompareGreaterThan(rangeshift.AsSByte(), Vector128.Create((sbyte)(-128 + 25)));
var flip = Sse2.AndNot(nomodify.AsByte(), Vector128.Create((byte)0x20));
return Sse2.Xor(vec, flip.AsUInt16());
}
public static void Main(string[] args)
{
BenchmarkDotNet.Running.BenchmarkRunner.Run<Benchmarks>();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment