Last active
December 20, 2021 18:20
-
-
Save EgorBo/d7e5982cc9777c1cabdee7857b072468 to your computer and use it in GitHub Desktop.
sse-vs-avx-string-equals.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
//BenchmarkDotNet=v0.13.1, OS=Windows 10.0.19042.1415 (20H2/October2020Update) | |
//Intel Core i7-8700K CPU 3.70GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores | |
//.NET SDK=6.0.100 | |
// [Host] : .NET 6.0.0 (6.0.21.52210), X64 RyuJIT | |
// DefaultJob : .NET 6.0.0 (6.0.21.52210), X64 RyuJIT | |
//| Method | headerName | Mean | Error | StdDev | Ratio | | |
//|----------------- |------------------- |---------:|----------:|----------:|------:| | |
//| StringEqauls | PROXY-AUTHENTICATE | 9.739 ns | 0.1571 ns | 0.1470 ns | 1.00 | | |
//| StringEqauls_AVX | PROXY-AUTHENTICATE | 1.282 ns | 0.0040 ns | 0.0038 ns | 0.13 | | |
//| StringEqauls_SSE | PROXY-AUTHENTICATE | 3.203 ns | 0.0048 ns | 0.0043 ns | 0.33 | | |
//| | | | | | | | |
//| StringEqauls | proxy-authenticate | 9.130 ns | 0.0809 ns | 0.0675 ns | 1.00 | | |
//| StringEqauls_AVX | proxy-authenticate | 1.285 ns | 0.0041 ns | 0.0035 ns | 0.14 | | |
//| StringEqauls_SSE | proxy-authenticate | 3.182 ns | 0.0059 ns | 0.0052 ns | 0.35 | | |
public class Benchmarks | |
{ | |
[Benchmark(Baseline = true)] | |
[Arguments("proxy-authenticate")] | |
[Arguments("PROXY-AUTHENTICATE")] | |
public bool StringEqauls(string headerName) | |
{ | |
return string.Equals(headerName, "Proxy-Authenticate", StringComparison.OrdinalIgnoreCase); | |
} | |
[Benchmark] | |
[Arguments("proxy-authenticate")] | |
[Arguments("PROXY-AUTHENTICATE")] | |
public bool StringEqauls_AVX(string headerName) | |
{ | |
return object.ReferenceEquals(headerName, "Proxy-Authenticate") || | |
(headerName.Length == "Proxy-Authenticate".Length && | |
CompareStringVsTwoVectors256AsciiIgnoreCase(headerName, | |
// Split "Proxy-Authenticate" into two 32-bytes vectors: "Proxy-Authentica" and "oxy-Authenticate" | |
Vector256.Create('P', 'R', 'O', 'X', 'Y', '-', 'A', 'U', 'T', 'H', 'E', 'N', 'T', 'I', 'C', 'A'), | |
Vector256.Create('O', 'X', 'Y', '-', 'A', 'U', 'T', 'H', 'E', 'N', 'T', 'I', 'C', 'A', 'T', 'E'))); | |
} | |
[Benchmark] | |
[Arguments("proxy-authenticate")] | |
[Arguments("PROXY-AUTHENTICATE")] | |
public bool StringEqauls_SSE(string headerName) | |
{ | |
return object.ReferenceEquals(headerName, "Proxy-Authenticate") || | |
(headerName.Length == "Proxy-Authenticate".Length && | |
CompareStringVsThreeVectors128AsciiIgnoreCase(headerName, | |
Vector128.Create('P', 'R', 'O', 'X', 'Y', '-', 'A', 'U'), | |
Vector128.Create('T', 'H', 'E', 'N', 'T', 'I', 'C', 'A'), | |
Vector128.Create('E', 'N', 'T', 'I', 'C', 'A', 'T', 'E'))); | |
} | |
private static bool CompareStringVsTwoVectors256AsciiIgnoreCase(string str1, | |
// str2 must be lower case | |
Vector256<ushort> str2_v1, | |
Vector256<ushort> str2_v2) | |
{ | |
ReadOnlySpan<char> span = str1.AsSpan(); | |
ref char spanStart = ref MemoryMarshal.GetReference(span); | |
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart)); | |
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>( | |
ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str1.Length - 16))); | |
return Avx2.MoveMask(Avx2.CompareEqual(ToUpper256(v1), str2_v1).AsByte()) == -1 && | |
Avx2.MoveMask(Avx2.CompareEqual(ToUpper256(v2), str2_v2).AsByte()) == -1; | |
} | |
private static bool CompareStringVsThreeVectors128AsciiIgnoreCase(string str1, | |
// str2 must be lower case | |
Vector128<ushort> str2_v1, | |
Vector128<ushort> str2_v2, | |
Vector128<ushort> str2_v3) | |
{ | |
ReadOnlySpan<char> span = str1.AsSpan(); | |
ref char spanStart = ref MemoryMarshal.GetReference(span); | |
var v1 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref spanStart)); | |
var v2 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, 8))); | |
var v3 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str1.Length - 8))); | |
return Sse2.MoveMask(Sse2.CompareEqual(ToUpper128(v1), str2_v1).AsByte()) == 0xFFFF && | |
Sse2.MoveMask(Sse2.CompareEqual(ToUpper128(v2), str2_v2).AsByte()) == 0xFFFF && | |
Sse2.MoveMask(Sse2.CompareEqual(ToUpper128(v3), str2_v3).AsByte()) == 0xFFFF; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
static Vector256<ushort> ToUpper256(Vector256<ushort> vec) | |
{ | |
var rangeshift = Avx2.Subtract(vec.AsByte(), Vector256.Create((byte)('a' + 128))); | |
var nomodify = Avx2.CompareGreaterThan(rangeshift.AsSByte(), Vector256.Create((sbyte)(-128 + 25))); | |
var flip = Avx2.AndNot(nomodify.AsByte(), Vector256.Create((byte)0x20)); | |
return Avx2.Xor(vec, flip.AsUInt16()); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
static Vector128<ushort> ToUpper128(Vector128<ushort> vec) | |
{ | |
var rangeshift = Sse2.Subtract(vec.AsByte(), Vector128.Create((byte)('a' + 128))); | |
var nomodify = Sse2.CompareGreaterThan(rangeshift.AsSByte(), Vector128.Create((sbyte)(-128 + 25))); | |
var flip = Sse2.AndNot(nomodify.AsByte(), Vector128.Create((byte)0x20)); | |
return Sse2.Xor(vec, flip.AsUInt16()); | |
} | |
public static void Main(string[] args) | |
{ | |
BenchmarkDotNet.Running.BenchmarkRunner.Run<Benchmarks>(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment