GrabYourPitchforks · June 25, 2024 20:38
diff --git a/utf8_charcount.cs b/utf8_charcount.cs
 using System;
 using System.Buffers;
 using System.Diagnostics;
 using System.IO;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 using System.Text;
 using System.Text.Unicode;
 using BenchmarkDotNet.Attributes;
 using BenchmarkDotNet.Jobs;
 using BenchmarkDotNet.Running;

 BenchmarkRunner.Run<Utf8Experimental>();

 [SimpleJob(RuntimeMoniker.Net80)]
 public unsafe class Utf8Experimental
 {
    private byte[] _data;

    [GlobalSetup]
    public void Setup()
    {
        _data = File.ReadAllBytes("twitter.json");
    }

    [Benchmark(Baseline = true)]
    public nuint CountChars_Net8()
    {
        return (uint)Encoding.UTF8.GetCharCount(_data);
    }

    // [Benchmark(Baseline = true)]
    public string GetString_Net8()
    {
        return (string)Encoding.UTF8.GetString(_data);
    }

    [Benchmark]
    public nuint CountChars_Mod_B()
    {
        return (uint)CountChars_AssumeValid_Entry_B(_data);
    }

    // [Benchmark]
    public string GetString_Mod_B()
    {
        nuint stringLength = CountChars_AssumeValid_Entry_B(_data);
        if (stringLength <= (uint)_data.Length)
        {
            bool success = false;
            string str = string.Create((int)stringLength, (data: _data, successPtr: (IntPtr)(&success)), static (chars, state) =>
            {
                var opStatus = Utf8.ToUtf16(state.data, chars, out int bytesRead, out int charsWritten, replaceInvalidSequences: false);
                *((bool*)state.successPtr) = (opStatus == OperationStatus.Done) && (bytesRead == state.data.Length) && (charsWritten == chars.Length);
            });

            if (success)
            {
                return str;
            }
        }

        // something went wrong - fall back to old logic instead
        return Encoding.UTF8.GetString(_data);
    }

    private static nuint CountChars_AssumeValid_Entry_B(ReadOnlySpan<byte> buffer)
    {
        if (buffer.IsEmpty)
        {
            return 0;
        }

        fixed (byte* pData = &MemoryMarshal.GetReference(buffer))
        {
            return CountChars_AssumeValid_Avx2_B(pData, (uint)buffer.Length);
        }
    }

    private static nuint CountChars_AssumeValid_Avx2_B(byte* pbData, nuint cbData)
    {
        Debug.Assert(pbData != null);
        Debug.Assert(cbData > 0);
        Debug.Assert(Avx2.X64.IsSupported);
        Debug.Assert(Popcnt.X64.IsSupported);

        // General logic: all bytes in the ranges [00..7F] and [C0..EF] should
        // result in 1 char being generated, since they all correspond to ASCII
        // chars or lead bytes corresponding to BMP chars. All bytes in the
        // range [F0..FF] should result in 2 chars being generated since they
        // are lead bytes corresponding to the astral planes.

        Vector256<byte> vecC0 = Vector256.Create((byte)0xC0);
        Vector256<byte> vec70 = Vector256.Create((byte)0x70);

        nuint cumulativeUtf16Chars = 0;

        // Read the first byte or more of data. This also has potential to read
        // data before the start of the buffer, which we'll discard.

        byte* pAlignedReadStart = (byte*)((nuint)pbData & ~(nuint)(Vector256<byte>.Count - 1));
        byte* pAlignedReadEnd = (byte*)((nuint)(pbData + cbData) /* can't integer overflow due to C memory addressing rules */ & ~(nuint)(Vector256<byte>.Count - 1));

        Vector256<byte> thisStripe = Avx2.LoadAlignedVector256(pAlignedReadStart);

        uint nonContinuationBytesBitmap = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(thisStripe.AsSByte(), vecC0.AsSByte()));
        uint astralLeadBytesBitmap = (uint)Avx2.MoveMask(Avx2.SubtractSaturate(thisStripe, vec70));

        int numPrefixBytesToDiscard = (int)pbData & (Vector256<byte>.Count - 1);
        uint startMask = unchecked((uint)(-1)) << numPrefixBytesToDiscard;

        // Do any suffix bytes need to be discarded?

        byte* pJustPastEndOfData = pbData + cbData;
        int numSuffixBytesToDiscard = -(int)pJustPastEndOfData & (Vector256<byte>.Count - 1); // could be 0
        uint finalMask = (unchecked((uint)(-1)) << numSuffixBytesToDiscard) >> numSuffixBytesToDiscard;

        if (pAlignedReadStart == pAlignedReadEnd)
        {
            nonContinuationBytesBitmap &= finalMask;
            astralLeadBytesBitmap &= finalMask;
        }

        nonContinuationBytesBitmap &= startMask;
        cumulativeUtf16Chars += (uint)Popcnt.PopCount(nonContinuationBytesBitmap);

        astralLeadBytesBitmap &= startMask;
        cumulativeUtf16Chars += (uint)Popcnt.PopCount(astralLeadBytesBitmap);

        // Now read the rest of the data in a loop.

        while (pAlignedReadStart < pAlignedReadEnd)
        {
            thisStripe = Avx2.LoadAlignedVector256(pAlignedReadStart + Vector256<byte>.Count);
            pAlignedReadStart += Vector256<byte>.Count;

            nonContinuationBytesBitmap = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(thisStripe.AsSByte(), vecC0.AsSByte()));
            astralLeadBytesBitmap = (uint)Avx2.MoveMask(Avx2.SubtractSaturate(thisStripe, vec70));

            // If we just ran past the end of the buffer, discard the extra bytes.

            if (pAlignedReadStart == pAlignedReadEnd)
            {
                nonContinuationBytesBitmap &= finalMask;
                astralLeadBytesBitmap &= finalMask;
            }

            cumulativeUtf16Chars += (uint)Popcnt.PopCount(nonContinuationBytesBitmap);
            cumulativeUtf16Chars += (uint)Popcnt.PopCount(astralLeadBytesBitmap);
        }

        return cumulativeUtf16Chars;
    }
 }
	using System;
	using System.Buffers;
	using System.Diagnostics;
	using System.IO;
	using System.Runtime.InteropServices;
	using System.Runtime.Intrinsics;
	using System.Runtime.Intrinsics.X86;
	using System.Text;
	using System.Text.Unicode;
	using BenchmarkDotNet.Attributes;
	using BenchmarkDotNet.Jobs;
	using BenchmarkDotNet.Running;

	BenchmarkRunner.Run<Utf8Experimental>();

	[SimpleJob(RuntimeMoniker.Net80)]
	public unsafe class Utf8Experimental
	{
	private byte[] _data;

	[GlobalSetup]
	public void Setup()
	{
	_data = File.ReadAllBytes("twitter.json");
	}

	[Benchmark(Baseline = true)]
	public nuint CountChars_Net8()
	{
	return (uint)Encoding.UTF8.GetCharCount(_data);
	}

	// [Benchmark(Baseline = true)]
	public string GetString_Net8()
	{
	return (string)Encoding.UTF8.GetString(_data);
	}

	[Benchmark]
	public nuint CountChars_Mod_B()
	{
	return (uint)CountChars_AssumeValid_Entry_B(_data);
	}

	// [Benchmark]
	public string GetString_Mod_B()
	{
	nuint stringLength = CountChars_AssumeValid_Entry_B(_data);
	if (stringLength <= (uint)_data.Length)
	{
	bool success = false;
	string str = string.Create((int)stringLength, (data: _data, successPtr: (IntPtr)(&success)), static (chars, state) =>
	{
	var opStatus = Utf8.ToUtf16(state.data, chars, out int bytesRead, out int charsWritten, replaceInvalidSequences: false);
	((bool)state.successPtr) = (opStatus == OperationStatus.Done) && (bytesRead == state.data.Length) && (charsWritten == chars.Length);
	});

	if (success)
	{
	return str;
	}
	}

	// something went wrong - fall back to old logic instead
	return Encoding.UTF8.GetString(_data);
	}

	private static nuint CountChars_AssumeValid_Entry_B(ReadOnlySpan<byte> buffer)
	{
	if (buffer.IsEmpty)
	{
	return 0;
	}

	fixed (byte* pData = &MemoryMarshal.GetReference(buffer))
	{
	return CountChars_AssumeValid_Avx2_B(pData, (uint)buffer.Length);
	}
	}

	private static nuint CountChars_AssumeValid_Avx2_B(byte* pbData, nuint cbData)
	{
	Debug.Assert(pbData != null);
	Debug.Assert(cbData > 0);
	Debug.Assert(Avx2.X64.IsSupported);
	Debug.Assert(Popcnt.X64.IsSupported);

	// General logic: all bytes in the ranges [00..7F] and [C0..EF] should
	// result in 1 char being generated, since they all correspond to ASCII
	// chars or lead bytes corresponding to BMP chars. All bytes in the
	// range [F0..FF] should result in 2 chars being generated since they
	// are lead bytes corresponding to the astral planes.

	Vector256<byte> vecC0 = Vector256.Create((byte)0xC0);
	Vector256<byte> vec70 = Vector256.Create((byte)0x70);

	nuint cumulativeUtf16Chars = 0;

	// Read the first byte or more of data. This also has potential to read
	// data before the start of the buffer, which we'll discard.

	byte* pAlignedReadStart = (byte*)((nuint)pbData & ~(nuint)(Vector256<byte>.Count - 1));
	byte* pAlignedReadEnd = (byte)((nuint)(pbData + cbData) / can't integer overflow due to C memory addressing rules */ & ~(nuint)(Vector256<byte>.Count - 1));

	Vector256<byte> thisStripe = Avx2.LoadAlignedVector256(pAlignedReadStart);

	uint nonContinuationBytesBitmap = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(thisStripe.AsSByte(), vecC0.AsSByte()));
	uint astralLeadBytesBitmap = (uint)Avx2.MoveMask(Avx2.SubtractSaturate(thisStripe, vec70));

	int numPrefixBytesToDiscard = (int)pbData & (Vector256<byte>.Count - 1);
	uint startMask = unchecked((uint)(-1)) << numPrefixBytesToDiscard;

	// Do any suffix bytes need to be discarded?

	byte* pJustPastEndOfData = pbData + cbData;
	int numSuffixBytesToDiscard = -(int)pJustPastEndOfData & (Vector256<byte>.Count - 1); // could be 0
	uint finalMask = (unchecked((uint)(-1)) << numSuffixBytesToDiscard) >> numSuffixBytesToDiscard;

	if (pAlignedReadStart == pAlignedReadEnd)
	{
	nonContinuationBytesBitmap &= finalMask;
	astralLeadBytesBitmap &= finalMask;
	}

	nonContinuationBytesBitmap &= startMask;
	cumulativeUtf16Chars += (uint)Popcnt.PopCount(nonContinuationBytesBitmap);

	astralLeadBytesBitmap &= startMask;
	cumulativeUtf16Chars += (uint)Popcnt.PopCount(astralLeadBytesBitmap);

	// Now read the rest of the data in a loop.

	while (pAlignedReadStart < pAlignedReadEnd)
	{
	thisStripe = Avx2.LoadAlignedVector256(pAlignedReadStart + Vector256<byte>.Count);
	pAlignedReadStart += Vector256<byte>.Count;

	nonContinuationBytesBitmap = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(thisStripe.AsSByte(), vecC0.AsSByte()));
	astralLeadBytesBitmap = (uint)Avx2.MoveMask(Avx2.SubtractSaturate(thisStripe, vec70));

	// If we just ran past the end of the buffer, discard the extra bytes.

	if (pAlignedReadStart == pAlignedReadEnd)
	{
	nonContinuationBytesBitmap &= finalMask;
	astralLeadBytesBitmap &= finalMask;
	}

	cumulativeUtf16Chars += (uint)Popcnt.PopCount(nonContinuationBytesBitmap);
	cumulativeUtf16Chars += (uint)Popcnt.PopCount(astralLeadBytesBitmap);
	}

	return cumulativeUtf16Chars;
	}
	}