EgorBo · December 27, 2021 20:16
diff --git a/FasterHttpHeadersParse.cs b/FasterHttpHeadersParse.cs

 using System.Diagnostics;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 using System.Text;

 public class Prog
 {
    private const byte ByteCR = (byte)'\r';
    private const byte ByteLF = (byte)'\n';
    private const byte ByteColon = (byte)':';
    private const byte ByteSpace = (byte)' ';
    private const byte ByteTab = (byte)'\t';

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static Vector128<byte> LoadVector128(ref byte start, nuint offset)
        => Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AddByteOffset(ref start, offset));

    /// <summary>
    /// </summary>
    /// <param name="handler">to report final key and value</param>
    /// <param name="span">data to process</param>
    /// <returns>position of CRLF, -1 in case of invalid header</returns>
    private static int TryTakeSingleHeaderFast(TRequestHandler handler, ReadOnlySpan<byte> span)
    {
        Debug.Assert(span.Length >= 16 && Sse2.IsSupported);

        // Rules:
        // 1) ends with CRLF, doesn't contain CR or LF inside
        // 2) key is not empty, should not contain ' ' and '\t'
        // 3) value can be empty
        // 4) there can be multiple ' ' and '\t' around value, e.g.:
        //    "key: \t\t   value\t     \r\n"
        //    and we must trim them when reporting key and value to handler

        ref byte searchSpace = ref MemoryMarshal.GetReference(span);

        nuint length = (nuint)span.Length;
        Vector128<byte> vectorCR = Vector128.Create(ByteCR);
        Vector128<byte> vectorLF = Vector128.Create(ByteLF);
        Vector128<byte> vectorColon = Vector128.Create(ByteColon);
        Vector128<byte> vectorSpace = Vector128.Create(ByteSpace);
        Vector128<byte> vectorTab = Vector128.Create(ByteTab);

        nuint colonPosition = 0;
        nuint valueStarts = 0;
        nuint offset = 0;
        nuint finalLength = 0;

    NEXT:
        Vector128<byte> vector = LoadVector128(ref searchSpace, offset);
        int colonIndex = Sse2.MoveMask(Sse2.CompareEqual(vector, vectorColon));

        Vector128<byte> cmpCR = Sse2.CompareEqual(vector, vectorCR);
        Vector128<byte> cmpLF = Sse2.CompareEqual(vector, vectorLF);
        Vector128<byte> cmpSpace = Sse2.CompareEqual(vector, vectorSpace);
        Vector128<byte> cmpTab = Sse2.CompareEqual(vector, vectorTab);

        // colonPosition is not found yet
        if (colonPosition == 0)
        {
            Vector128<byte> spaceOrTab = Sse2.Or(cmpSpace, cmpTab);
            Vector128<byte> crOrLf = Sse2.Or(cmpCR, cmpLF);

            int crOrLfOrSpaceOrTab = Sse2.MoveMask(Sse2.Or(crOrLf, spaceOrTab));

            // no colon in the current vector - most likely we still process header's name
            if (colonIndex == 0)
            {
                // we don't expect to see any of ' ', '\t', '\r', '\n' till we find ':'
                if (crOrLfOrSpaceOrTab != 0)
                {
                    return -1;
                }

                offset += 16;
                if (offset == length)
                {
                    // unexpected end of span
                    return -1;
                }
                if (offset + 16 > length)
                {
                    // not enough space for the next 128bit vector so let's 
                    // overlap with the current one
                    offset = length - 16;
                }
                goto NEXT;
            }

            // we've found the ':' in the current vector, let's check where exactly
            nuint colonLocalPos = (nuint)BitOperations.TrailingZeroCount(colonIndex);

            // update colonPosition
            colonPosition = offset + colonLocalPos;

            // ':' is not expected to be the first symbol, also
            // none of ' ', '\t', '\r', '\n' are expected to be found before ':'
            if (colonPosition == 0 || colonLocalPos > (nuint)BitOperations.TrailingZeroCount(crOrLfOrSpaceOrTab))
            {
                return -1;
            }

            if (colonLocalPos == 15)
            {
                offset += 16;
                if (offset == length)
                {
                    // unexpected end of span
                    return -1;
                }
                if (offset + 16 > length)
                {
                    // not enough space for the next 128bit vector so let's 
                    // overlap with the current one
                    offset = length - 16;
                }
                goto NEXT;
            }
        }

        if (valueStarts == 0)
        {
            uint nonWhiteSpace = ~(uint)Sse2.MoveMask(Sse2.Or(cmpSpace, cmpTab));

            // Vector still contains ':' separator so we need to ignore it and the part of the 
            // header's name 
            if (offset <= colonPosition)
            {
                int toIgnore = (int)(colonPosition + 1 - offset);
                nonWhiteSpace = (nonWhiteSpace >> toIgnore) << toIgnore;
            }
            
            var posOfFirstNonWhitespace = (nuint)BitOperations.TrailingZeroCount(nonWhiteSpace);
            if (posOfFirstNonWhitespace == 16)
            {
                offset += 16;
                if (offset == length)
                {
                    // unexpected end of span
                    return -1;
                }
                if (offset + 16 > length)
                {
                    // not enough space for the next 128bit vector so let's 
                    // overlap with the current one
                    offset = length - 16;
                }
                goto NEXT;
            }
            valueStarts = offset + posOfFirstNonWhitespace;
        }

        // Now let's wait for CRLF
        nuint crPos = (nuint)Sse2.MoveMask(cmpCR);
        if (crPos == 0)
        {
            if ((nuint)Sse2.MoveMask(cmpLF) != 0)
                return -1;

            offset += 16;
            if (offset == length)
            {
                // unexpected end of span
                return -1;
            }
            if (offset + 16 > length)
            {
                // not enough space for the next 128bit vector so let's 
                // overlap with the current one
                offset = length - 16;
            }
            goto NEXT;
        }

        crPos = (nuint)BitOperations.TrailingZeroCount(crPos);
        if (crPos == 15)
        {
            // Rare case: CR was found in the last byte of the current vector
            // make sure current chunk doesn't have any LF and check the next byte directly
            if (Sse2.MoveMask(cmpLF) != 0) // use TestZ here?
            {
                return -1;
            }

            if (offset + 16 >= length || Unsafe.AddByteOffset(ref searchSpace, offset + 16) != ByteLF)
            {
                // either unexpected end of span or next byte after CR is not LF
                return -1;
            }

            finalLength = offset + 17;
            goto END;
        }

        var lfPos = (nuint)BitOperations.TrailingZeroCount(Sse2.MoveMask(cmpLF));
        if (crPos + 1 != lfPos)
        {
            // CR is either not followed by LF or LF was found earlier than expected
            return -1;
        }
        finalLength = offset + lfPos + 1;

    END:

        // Compatibility: Non-fast version of parser gives up on inputs like "1:\r\n" (however, it's fine with "1: \r\n")
        if (finalLength < 5)
            return -1;

        nuint valueEnds = finalLength - 3; // minus CRLF

        // Rare case: trailing spaces or tabs
        for (nuint i = valueEnds; i > valueStarts; i--)
        {
            // for most cases we expect this loop to fire just a single iteration
            var b = Unsafe.AddByteOffset(ref searchSpace, i);
            if ((b != ByteSpace) && (b != ByteTab))
            {
                break;
            }
            valueEnds--;
        }

        handler.OnHeader(span.Slice(0, (int)colonPosition), span.Slice((int)valueStarts, (int)(valueEnds + 1 - valueStarts)));

        return (int)finalLength;
    }

    static unsafe void Main()
    {
        int length = TryTakeSingleHeaderFast(new TRequestHandler(), Encoding.UTF8.GetBytes("Accept-Encoding: gzip, deflate, br\r\n"));
        Console.WriteLine(length);
        Console.ReadKey();
    }
 }

 class TRequestHandler
 {
    public void OnHeader(ReadOnlySpan<byte> name, ReadOnlySpan<byte> value)
    {
        Console.WriteLine($"Name: (Len={name.Length}) {Encoding.UTF8.GetString(name)}");
        Console.WriteLine($"Value: (Len={value.Length}) {Encoding.UTF8.GetString(value)}");
    }
 }

	using System.Diagnostics;
	using System.Numerics;
	using System.Runtime.CompilerServices;
	using System.Runtime.InteropServices;
	using System.Runtime.Intrinsics;
	using System.Runtime.Intrinsics.X86;
	using System.Text;

	public class Prog
	{
	private const byte ByteCR = (byte)'\r';
	private const byte ByteLF = (byte)'\n';
	private const byte ByteColon = (byte)':';
	private const byte ByteSpace = (byte)' ';
	private const byte ByteTab = (byte)'\t';

	[MethodImpl(MethodImplOptions.AggressiveInlining)]
	private static Vector128<byte> LoadVector128(ref byte start, nuint offset)
	=> Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AddByteOffset(ref start, offset));

	/// <summary>
	/// </summary>
	/// <param name="handler">to report final key and value</param>
	/// <param name="span">data to process</param>
	/// <returns>position of CRLF, -1 in case of invalid header</returns>
	private static int TryTakeSingleHeaderFast(TRequestHandler handler, ReadOnlySpan<byte> span)
	{
	Debug.Assert(span.Length >= 16 && Sse2.IsSupported);

	// Rules:
	// 1) ends with CRLF, doesn't contain CR or LF inside
	// 2) key is not empty, should not contain ' ' and '\t'
	// 3) value can be empty
	// 4) there can be multiple ' ' and '\t' around value, e.g.:
	// "key: \t\t value\t \r\n"
	// and we must trim them when reporting key and value to handler

	ref byte searchSpace = ref MemoryMarshal.GetReference(span);

	nuint length = (nuint)span.Length;
	Vector128<byte> vectorCR = Vector128.Create(ByteCR);
	Vector128<byte> vectorLF = Vector128.Create(ByteLF);
	Vector128<byte> vectorColon = Vector128.Create(ByteColon);
	Vector128<byte> vectorSpace = Vector128.Create(ByteSpace);
	Vector128<byte> vectorTab = Vector128.Create(ByteTab);

	nuint colonPosition = 0;
	nuint valueStarts = 0;
	nuint offset = 0;
	nuint finalLength = 0;

	NEXT:
	Vector128<byte> vector = LoadVector128(ref searchSpace, offset);
	int colonIndex = Sse2.MoveMask(Sse2.CompareEqual(vector, vectorColon));

	Vector128<byte> cmpCR = Sse2.CompareEqual(vector, vectorCR);
	Vector128<byte> cmpLF = Sse2.CompareEqual(vector, vectorLF);
	Vector128<byte> cmpSpace = Sse2.CompareEqual(vector, vectorSpace);
	Vector128<byte> cmpTab = Sse2.CompareEqual(vector, vectorTab);

	// colonPosition is not found yet
	if (colonPosition == 0)
	{
	Vector128<byte> spaceOrTab = Sse2.Or(cmpSpace, cmpTab);
	Vector128<byte> crOrLf = Sse2.Or(cmpCR, cmpLF);

	int crOrLfOrSpaceOrTab = Sse2.MoveMask(Sse2.Or(crOrLf, spaceOrTab));

	// no colon in the current vector - most likely we still process header's name
	if (colonIndex == 0)
	{
	// we don't expect to see any of ' ', '\t', '\r', '\n' till we find ':'
	if (crOrLfOrSpaceOrTab != 0)
	{
	return -1;
	}

	offset += 16;
	if (offset == length)
	{
	// unexpected end of span
	return -1;
	}
	if (offset + 16 > length)
	{
	// not enough space for the next 128bit vector so let's
	// overlap with the current one
	offset = length - 16;
	}
	goto NEXT;
	}

	// we've found the ':' in the current vector, let's check where exactly
	nuint colonLocalPos = (nuint)BitOperations.TrailingZeroCount(colonIndex);

	// update colonPosition
	colonPosition = offset + colonLocalPos;

	// ':' is not expected to be the first symbol, also
	// none of ' ', '\t', '\r', '\n' are expected to be found before ':'
	if (colonPosition == 0 \|\| colonLocalPos > (nuint)BitOperations.TrailingZeroCount(crOrLfOrSpaceOrTab))
	{
	return -1;
	}

	if (colonLocalPos == 15)
	{
	offset += 16;
	if (offset == length)
	{
	// unexpected end of span
	return -1;
	}
	if (offset + 16 > length)
	{
	// not enough space for the next 128bit vector so let's
	// overlap with the current one
	offset = length - 16;
	}
	goto NEXT;
	}
	}

	if (valueStarts == 0)
	{
	uint nonWhiteSpace = ~(uint)Sse2.MoveMask(Sse2.Or(cmpSpace, cmpTab));

	// Vector still contains ':' separator so we need to ignore it and the part of the
	// header's name
	if (offset <= colonPosition)
	{
	int toIgnore = (int)(colonPosition + 1 - offset);
	nonWhiteSpace = (nonWhiteSpace >> toIgnore) << toIgnore;
	}

	var posOfFirstNonWhitespace = (nuint)BitOperations.TrailingZeroCount(nonWhiteSpace);
	if (posOfFirstNonWhitespace == 16)
	{
	offset += 16;
	if (offset == length)
	{
	// unexpected end of span
	return -1;
	}
	if (offset + 16 > length)
	{
	// not enough space for the next 128bit vector so let's
	// overlap with the current one
	offset = length - 16;
	}
	goto NEXT;
	}
	valueStarts = offset + posOfFirstNonWhitespace;
	}

	// Now let's wait for CRLF
	nuint crPos = (nuint)Sse2.MoveMask(cmpCR);
	if (crPos == 0)
	{
	if ((nuint)Sse2.MoveMask(cmpLF) != 0)
	return -1;

	offset += 16;
	if (offset == length)
	{
	// unexpected end of span
	return -1;
	}
	if (offset + 16 > length)
	{
	// not enough space for the next 128bit vector so let's
	// overlap with the current one
	offset = length - 16;
	}
	goto NEXT;
	}

	crPos = (nuint)BitOperations.TrailingZeroCount(crPos);
	if (crPos == 15)
	{
	// Rare case: CR was found in the last byte of the current vector
	// make sure current chunk doesn't have any LF and check the next byte directly
	if (Sse2.MoveMask(cmpLF) != 0) // use TestZ here?
	{
	return -1;
	}

	if (offset + 16 >= length \|\| Unsafe.AddByteOffset(ref searchSpace, offset + 16) != ByteLF)
	{
	// either unexpected end of span or next byte after CR is not LF
	return -1;
	}

	finalLength = offset + 17;
	goto END;
	}

	var lfPos = (nuint)BitOperations.TrailingZeroCount(Sse2.MoveMask(cmpLF));
	if (crPos + 1 != lfPos)
	{
	// CR is either not followed by LF or LF was found earlier than expected
	return -1;
	}
	finalLength = offset + lfPos + 1;

	END:

	// Compatibility: Non-fast version of parser gives up on inputs like "1:\r\n" (however, it's fine with "1: \r\n")
	if (finalLength < 5)
	return -1;

	nuint valueEnds = finalLength - 3; // minus CRLF

	// Rare case: trailing spaces or tabs
	for (nuint i = valueEnds; i > valueStarts; i--)
	{
	// for most cases we expect this loop to fire just a single iteration
	var b = Unsafe.AddByteOffset(ref searchSpace, i);
	if ((b != ByteSpace) && (b != ByteTab))
	{
	break;
	}
	valueEnds--;
	}

	handler.OnHeader(span.Slice(0, (int)colonPosition), span.Slice((int)valueStarts, (int)(valueEnds + 1 - valueStarts)));

	return (int)finalLength;
	}

	static unsafe void Main()
	{
	int length = TryTakeSingleHeaderFast(new TRequestHandler(), Encoding.UTF8.GetBytes("Accept-Encoding: gzip, deflate, br\r\n"));
	Console.WriteLine(length);
	Console.ReadKey();
	}
	}

	class TRequestHandler
	{
	public void OnHeader(ReadOnlySpan<byte> name, ReadOnlySpan<byte> value)
	{
	Console.WriteLine($"Name: (Len={name.Length}) {Encoding.UTF8.GetString(name)}");
	Console.WriteLine($"Value: (Len={value.Length}) {Encoding.UTF8.GetString(value)}");
	}
	}