Skip to content

Instantly share code, notes, and snippets.

Last active December 27, 2020 13:24
Show Gist options
  • Save EgorBo/8a4e4cda14eac0e605dd7bac68c56314 to your computer and use it in GitHub Desktop.
Save EgorBo/8a4e4cda14eac0e605dd7bac68c56314 to your computer and use it in GitHub Desktop.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
public class Program
public static void Main(string[] args)
public class SmallStringsBenchmarks
public bool SpanStartsWith(string str) => str.AsSpan().StartsWith("http");
// This should be almost as fast as SpanStartsWith_unrolled in future
public bool SpanStartsWith_IgnoreCaseOrdinal(string str) => str.AsSpan().StartsWith("http", StringComparison.InvariantCultureIgnoreCase);
public bool SpanStartsWith_IgnoreCaseInvariant(string str) => str.AsSpan().StartsWith("http", StringComparison.OrdinalIgnoreCase);
[Benchmark(Baseline = true)]
public bool SpanStartsWith_unrolled(string str)
var span = str.AsSpan();
// The following code emulates what JIT emits (don't worry, final codegen is not that big)
return span.Length >= 4 &&
ref Unsafe.As<char, byte>(
ref MemoryMarshal.GetReference(span))) == 0x70007400740068UL;
public bool SpanStartsWith_IgnoreCase_unrolled(string str)
var span = str.AsSpan();
// The following code emulates what JIT is going to emit (don't worry, final codegen is not that big)
return span.Length >= 4 &&
ref Unsafe.As<char, byte>( // | 0x20 is enough here since the target string belongs to `[A..z]` only.
ref MemoryMarshal.GetReference(span))) | 0x0020002000200020UL) == 0x70007400740068UL;
public class LargeStringsBenchmarks
public bool SpanStartsWith(string str) => str.AsSpan().StartsWith("ProxyAuthenticateHeader");
public bool SpanStartsWith_IgnoreCaseOrdinal(string str) => str.AsSpan().StartsWith("ProxyAuthenticateHeader", StringComparison.InvariantCultureIgnoreCase);
public bool SpanStartsWith_IgnoreCaseInvariant(string str) => str.AsSpan().StartsWith("ProxyAuthenticateHeader", StringComparison.OrdinalIgnoreCase);
[Benchmark(Baseline = true)]
public bool SpanStartsWith_unrolled(string str)
// The following code emulates what JIT is going to emit (don't worry, final codegen is not that big)
if (str.Length < "ProxyAuthenticateHeader".Length)
return false;
ref char spanStart = ref MemoryMarshal.GetReference(str.AsSpan());
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart));
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str.Length - 16)));
// Split "ProxyAuthenticateHeader" into two 32-bytes vectors: "ProxyAuthenticat" and "thenticateHeader"
// NOTE: they overlap! because total length is 23, not 32.
var vec = Avx2.Or(
Avx2.Xor(v1, Vector256.Create('P', 'r', 'o', 'x', 'y', 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't')),
Avx2.Xor(v2, Vector256.Create('t', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't', 'e', 'H', 'e', 'a', 'd', 'e', 'r')));
return Avx.TestZ(vec, vec);
vmovupd ymm0, ymmword ptr[rdi]
add esi, -16
movsxd rax, esi
vmovupd ymm1, ymmword ptr[rdi+2*rax]
vpxor ymm0, ymm0, ymmword ptr[reloc @RWD00]
vpxor ymm1, ymm1, ymmword ptr[reloc @RWD32]
vpor ymm0, ymm0, ymm1
vpmovmskb eax, ymm0
test eax, eax
sete al
movzx rax, al
public bool SpanStartsWith_IgnoreCase_unrolled(string str)
// The following code emulates what JIT is going to emit (don't worry, final codegen is not that big)
if (str.Length < "ProxyAuthenticateHeader".Length)
return false;
ref char spanStart = ref MemoryMarshal.GetReference(str.AsSpan());
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart));
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str.Length - 16)));
var vec = Avx2.Or(
Avx2.Xor(Avx2.Or(v1, Vector256.Create((ushort)0x0020)), Vector256.Create('p', 'r', 'o', 'x', 'y', 'a', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't')),
Avx2.Xor(Avx2.Or(v2, Vector256.Create((ushort)0x0020)), Vector256.Create('t', 'h', 'e', 'n', 't', 'i', 'c', 'a', 't', 'e', 'h', 'e', 'a', 'd', 'e', 'r')));
return Avx.TestZ(vec, vec);
vmovupd ymm0, ymmword ptr[rdi]
add esi, -16
movsxd rax, esi
vmovupd ymm1, ymmword ptr[rdi+2*rax]
vpor ymm0, ymm0, ymmword ptr[reloc @RWD00]
vpxor ymm0, ymm0, ymmword ptr[reloc @RWD32]
vpor ymm1, ymm1, ymmword ptr[reloc @RWD00]
vpxor ymm1, ymm1, ymmword ptr[reloc @RWD64]
vpor ymm0, ymm0, ymm1
vpmovmskb eax, ymm0
test eax, eax
sete al
movzx rax, al
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment