Skip to content

Instantly share code, notes, and snippets.

@hypeartist
Last active July 20, 2022 01:06
Show Gist options
  • Save hypeartist/a9f7e1cc0c3a2775d9e9f427bc4b3c8e to your computer and use it in GitHub Desktop.
Save hypeartist/a9f7e1cc0c3a2775d9e9f427bc4b3c8e to your computer and use it in GitHub Desktop.
Count .NET char instances containing in UTF8 byte array (rev2)
[Benchmark]
public int GetCharInstanceCountCustomScalar()
{
return GetCharInstanceCountCustomScalarImpl(_srcBuffer, _srcBufferlength);
}
[Benchmark]
public int GetCharInstanceCountCustomSse2()
{
return GetCharInstanceCountCustomSse2Impl(_srcBuffer, _srcBufferlength);
}
[Benchmark]
public int GetCharInstanceCountCustomAvx2()
{
return GetCharInstanceCountCustomAvx2Impl(_srcBuffer, _srcBufferlength);
}
//
[Benchmark]
public int GetCharInstanceCountNetCore1()
{
return Encoding.UTF8.GetCharCount(_srcBuffer, _srcBufferlength);
}
[Benchmark]
public int GetCharInstanceCountNetCore2()
{
// https://github.com/GrabYourPitchforks/coreclr/pull/2
return GetUtf16CharCountFromKnownWellFormedUtf8(new ReadOnlySpan<byte>(_srcBuffer, _srcBufferlength));
}
//========================================================================================
private static byte* _srcBuffer;
private static int _srcBufferlength;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ZeroOrLessAsZeroOtherwiseAsOne(int v) => ~(~(v - 1) >> 31) + 1;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetCharByteCountFromLeadingByte(int b)
{
var nonAsciiSizeOrZeroMinusOne = BitOperations.LeadingZeroCount((uint)~b << 24) - 1;
return ((nonAsciiSizeOrZeroMinusOne >> 31) ^ nonAsciiSizeOrZeroMinusOne) + 1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetFourBytesCharCodepoint(int b1, int b2, int b3, int b4)
{
return ((b1 & 7) << 18) | ((b2 & 0x3F) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetCharInstanceCountCustomSse2Impl(byte* utf8Data, int utf8DataSize)
{
var vLeadingBytesUpperHalfMask = Vector128.Create((byte) 0b1111_0000);
var vHitMask = Vector128.Create((byte)0b_1100_0000);
var vNotHitMask = Vector128.Create((byte)0b_1000_0000);
var vConst0 = Vector128<byte>.Zero;
var charCount = 0;
var stopAt = utf8Data + (utf8DataSize & 0b_1111_1111_1111_1111_1111_1111_1111_0000);
var endAt = stopAt + (utf8DataSize & 0b_0000_0000_0000_0000_0000_0000_0000_1111);
while (utf8Data != stopAt)
{
var vUtf8Data = Sse2.LoadVector128(utf8Data);
var vLeadingBytesMask = Sse2.CompareEqual(Sse2.CompareEqual(Sse2.And(vUtf8Data, vHitMask), vNotHitMask), vConst0);
var leadingBytesMask = Sse2.MoveMask(vLeadingBytesMask);
charCount += BitOperations.PopCount((uint) leadingBytesMask);
var vFourBytesCharsLeadingBytesMask = Sse2.CompareEqual(Sse2.And(vUtf8Data, vLeadingBytesUpperHalfMask), vLeadingBytesUpperHalfMask);
var fourBytesCharsLeadingBytesMask = Sse2.MoveMask(vFourBytesCharsLeadingBytesMask);
charCount += BitOperations.PopCount((uint) fourBytesCharsLeadingBytesMask);
utf8Data += Vector128<byte>.Count;
}
while(utf8Data != endAt)
{
var leadingByte = utf8Data[0];
var charByteCount = GetCharByteCountFromLeadingByte(leadingByte);
charCount += (ZeroOrLessAsZeroOtherwiseAsOne(((leadingByte >> 4) & 0b1111) ^ 0b1111) ^ 1) + 1;
utf8Data += charByteCount;
}
return charCount;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetCharInstanceCountCustomAvx2Impl(byte* utf8Data, int utf8DataSize)
{
var vLeadingBytesUpperHalfMask = Vector256.Create((byte) 0b1111_0000);
var vHitMask = Vector256.Create((byte)0b_1100_0000);
var vNotHitMask = Vector256.Create((byte)0b_1000_0000);
var vConst0 = Vector256<byte>.Zero;
var charCount = 0;
var stopAt = utf8Data + (utf8DataSize & 0b_1111_1111_1111_1111_1111_1111_1110_0000);
var endAt = stopAt + (utf8DataSize & 0b_0000_0000_0000_0000_0000_0000_0001_1111);
while (utf8Data != stopAt)
{
var vUtf8Data = Avx.LoadVector256(utf8Data);
var vLeadingBytesMask = Avx2.CompareEqual(Avx2.CompareEqual(Avx2.And(vUtf8Data, vHitMask), vNotHitMask), vConst0);
var leadingBytesMask = Avx2.MoveMask(vLeadingBytesMask);
charCount += BitOperations.PopCount((uint) leadingBytesMask);
var vFourBytesCharsLeadingBytesMask = Avx2.CompareEqual(Avx2.And(vUtf8Data, vLeadingBytesUpperHalfMask), vLeadingBytesUpperHalfMask);
var fourBytesCharsLeadingBytesMask = Avx2.MoveMask(vFourBytesCharsLeadingBytesMask);
charCount += BitOperations.PopCount((uint) fourBytesCharsLeadingBytesMask);
utf8Data += Vector256<byte>.Count;
}
while(utf8Data != endAt)
{
var leadingByte = utf8Data[0];
var charByteCount = GetCharByteCountFromLeadingByte(leadingByte);
charCount += (ZeroOrLessAsZeroOtherwiseAsOne(((leadingByte >> 4) & 0b1111) ^ 0b1111) ^ 1) + 1;
utf8Data += charByteCount;
}
return charCount;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetCharInstanceCountCustomScalarImpl(byte* utf8Data, int utf8DataSize)
{
var charCount = 0;
var endAt = utf8Data + utf8DataSize;
while(utf8Data < endAt)
{
var leadingByte = utf8Data[0];
var charByteCount = GetCharByteCountFromLeadingByte(leadingByte);
charCount += (ZeroOrLessAsZeroOtherwiseAsOne(((leadingByte >> 4) & 0b1111) ^ 0b1111) ^ 1) + 1;//ZeroOrLessAsZeroOtherwiseAsOne(GetFourBytesCharCodepoint(utf8Data[0], utf8Data[1], utf8Data[2], utf8Data[3]) >> 16) + 1;
utf8Data += charByteCount;
}
return charCount;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment