Skip to content

Instantly share code, notes, and snippets.

@EgorBo
Last active December 27, 2020 13:24
Show Gist options
  • Save EgorBo/8a4e4cda14eac0e605dd7bac68c56314 to your computer and use it in GitHub Desktop.
Save EgorBo/8a4e4cda14eac0e605dd7bac68c56314 to your computer and use it in GitHub Desktop.
Span_StartsWith_Benchmark.cs
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
public class Program
{
public static void Main(string[] args)
{
BenchmarkSwitcher.FromAssembly(typeof(SmallStringsBenchmarks).Assembly).Run(args);
}
}
public class SmallStringsBenchmarks
{
[Benchmark]
[Arguments("https://google.com")]
public bool SpanStartsWith(string str) => str.AsSpan().StartsWith("http");
[Benchmark]
[Arguments("https://google.com")]
// This should be almost as fast as SpanStartsWith_unrolled in future
public bool SpanStartsWith_IgnoreCaseOrdinal(string str) => str.AsSpan().StartsWith("http", StringComparison.InvariantCultureIgnoreCase);
[Benchmark]
[Arguments("https://google.com")]
public bool SpanStartsWith_IgnoreCaseInvariant(string str) => str.AsSpan().StartsWith("http", StringComparison.OrdinalIgnoreCase);
[Benchmark(Baseline = true)]
[Arguments("https://google.com")]
public bool SpanStartsWith_unrolled(string str)
{
var span = str.AsSpan();
// The following code emulates what JIT emits (don't worry, final codegen is not that big)
return span.Length >= 4 &&
Unsafe.ReadUnaligned<ulong>(
ref Unsafe.As<char, byte>(
ref MemoryMarshal.GetReference(span))) == 0x70007400740068UL;
}
[Benchmark]
[Arguments("https://google.com")]
public bool SpanStartsWith_IgnoreCase_unrolled(string str)
{
var span = str.AsSpan();
// The following code emulates what JIT is going to emit (don't worry, final codegen is not that big)
return span.Length >= 4 &&
(Unsafe.ReadUnaligned<ulong>(
ref Unsafe.As<char, byte>( // | 0x20 is enough here since the target string belongs to `[A..z]` only.
ref MemoryMarshal.GetReference(span))) | 0x0020002000200020UL) == 0x70007400740068UL;
}
}
public class LargeStringsBenchmarks
{
[Benchmark]
[Arguments("ProxyAuthenticateHeaderStr")]
public bool SpanStartsWith(string str) => str.AsSpan().StartsWith("ProxyAuthenticateHeader");
[Benchmark]
[Arguments("ProxyAuthenticateHeaderStr")]
public bool SpanStartsWith_IgnoreCaseOrdinal(string str) => str.AsSpan().StartsWith("ProxyAuthenticateHeader", StringComparison.InvariantCultureIgnoreCase);
[Benchmark]
[Arguments("ProxyAuthenticateHeaderStr")]
public bool SpanStartsWith_IgnoreCaseInvariant(string str) => str.AsSpan().StartsWith("ProxyAuthenticateHeader", StringComparison.OrdinalIgnoreCase);
[Benchmark(Baseline = true)]
[Arguments("ProxyAuthenticateHeaderStr")]
public bool SpanStartsWith_unrolled(string str)
{
// The following code emulates what JIT is going to emit (don't worry, final codegen is not that big)
if (str.Length < "ProxyAuthenticateHeader".Length)
return false;
ref char spanStart = ref MemoryMarshal.GetReference(str.AsSpan());
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart));
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str.Length - 16)));
// Split "ProxyAuthenticateHeader" into two 32-bytes vectors: "ProxyAuthenticat" and "thenticateHeader"
// NOTE: they overlap! because total length is 23, not 32.
var vec = Avx2.Or(
Avx2.Xor(v1, Vector256.Create('P', 'r', 'o', 'x', 'y', 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't')),
Avx2.Xor(v2, Vector256.Create('t', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't', 'e', 'H', 'e', 'a', 'd', 'e', 'r')));
return Avx.TestZ(vec, vec);
/*
vmovupd ymm0, ymmword ptr[rdi]
add esi, -16
movsxd rax, esi
vmovupd ymm1, ymmword ptr[rdi+2*rax]
vpxor ymm0, ymm0, ymmword ptr[reloc @RWD00]
vpxor ymm1, ymm1, ymmword ptr[reloc @RWD32]
vpor ymm0, ymm0, ymm1
vpmovmskb eax, ymm0
test eax, eax
sete al
movzx rax, al
*/
}
[Benchmark]
[Arguments("ProxyAuthenticateHeaderStr")]
public bool SpanStartsWith_IgnoreCase_unrolled(string str)
{
// The following code emulates what JIT is going to emit (don't worry, final codegen is not that big)
if (str.Length < "ProxyAuthenticateHeader".Length)
return false;
ref char spanStart = ref MemoryMarshal.GetReference(str.AsSpan());
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart));
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str.Length - 16)));
var vec = Avx2.Or(
Avx2.Xor(Avx2.Or(v1, Vector256.Create((ushort)0x0020)), Vector256.Create('p', 'r', 'o', 'x', 'y', 'a', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't')),
Avx2.Xor(Avx2.Or(v2, Vector256.Create((ushort)0x0020)), Vector256.Create('t', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't', 'e', 'h', 'e', 'a', 'd', 'e', 'r')));
return Avx.TestZ(vec, vec);
/*
vmovupd ymm0, ymmword ptr[rdi]
add esi, -16
movsxd rax, esi
vmovupd ymm1, ymmword ptr[rdi+2*rax]
vpor ymm0, ymm0, ymmword ptr[reloc @RWD00]
vpxor ymm0, ymm0, ymmword ptr[reloc @RWD32]
vpor ymm1, ymm1, ymmword ptr[reloc @RWD00]
vpxor ymm1, ymm1, ymmword ptr[reloc @RWD64]
vpor ymm0, ymm0, ymm1
vpmovmskb eax, ymm0
test eax, eax
sete al
movzx rax, al
*/
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment