SwapnilGaikwad · June 22, 2022 16:02
diff --git a/avoid_recomputing_moveMask.txt b/avoid_recomputing_moveMask.txt
 diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
 index 163d62208c6..864bcb58b2e 100644
 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
 +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
 @@ -794,6 +794,8 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             // This method is written such that control generally flows top-to-bottom, avoiding
             // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII
             // data, we jump out of the hot paths to targets at the end of the method.
 +
 +            Vector128<ushort> asciiMaskForTestZ = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware
             Vector128<ushort> asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW
             const uint NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether 'currentMask' contains non-ASCII data
 
 @@ -805,9 +807,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
 
             firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load
 
 -            if (VectorContainsNonAsciiChar(firstVector))
 +            // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element
 +            // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
 +            // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored.
 +
 +            currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 +
 +            if ((currentMask & NonAsciiDataSeenMask) != 0)
             {
 -                goto FoundNonAsciiDataInFirstVector;
 +                goto FoundNonAsciiDataInCurrentMask;
             }
 
             // If we have less than 32 bytes to process, just go straight to the final unaligned
 @@ -880,9 +888,23 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             // Remember, at this point pBuffer is still aligned.
 
             firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer);
 -            if (VectorContainsNonAsciiChar(firstVector))
 +            if (Sse41.IsSupported)
             {
 -                goto FoundNonAsciiDataInFirstVector;
 +                // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
 +                // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
 +                if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
 +                {
 +                    goto FoundNonAsciiDataInFirstVector;
 +                }
 +            }
 +            else
 +            {
 +                // See comment earlier in the method for an explanation of how the below logic works.
 +                currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 +                if ((currentMask & NonAsciiDataSeenMask) != 0)
 +                {
 +                    goto FoundNonAsciiDataInCurrentMask;
 +                }
             }
 
         IncrementCurrentOffsetBeforeFinalUnalignedVectorRead:
 @@ -899,9 +921,23 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
                 pBuffer = (char*)((byte*)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes);
                 firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load
 
 -                if (VectorContainsNonAsciiChar(firstVector))
 +                if (Sse41.IsSupported)
 +                {
 +                    // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
 +                    // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
 +                    if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
 +                    {
 +                        goto FoundNonAsciiDataInFirstVector;
 +                    }
 +                }
 +                else
                 {
 -                     goto FoundNonAsciiDataInFirstVector;
 +                    // See comment earlier in the method for an explanation of how the below logic works.
 +                    currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 +                    if ((currentMask & NonAsciiDataSeenMask) != 0)
 +                    {
 +                        goto FoundNonAsciiDataInCurrentMask;
 +                    }
                 }
 
                 pBuffer += SizeOfVector128InChars;
 @@ -917,23 +953,37 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             // We don't know if the first or the second vector contains non-ASCII data. Check the first
             // vector, and if that's all-ASCII then the second vector must be the culprit. Either way
             // we'll make sure the first vector local is the one that contains the non-ASCII data.
 -
 -            if (VectorContainsNonAsciiChar(firstVector))
 +            // See comment earlier in the method for an explanation of how the below logic works.
 +            if (Sse41.IsSupported)
             {
 -                goto FoundNonAsciiDataInFirstVector;
 +                if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
 +                {
 +                    goto FoundNonAsciiDataInFirstVector;
 +                }
 +            }
 +            else
 +            {
 +                currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 +                if ((currentMask & NonAsciiDataSeenMask) != 0)
 +                {
 +                    goto FoundNonAsciiDataInCurrentMask;
 +                }
             }
 
 -            // Wasn't the first vector; must be the second.
 +                // Wasn't the first vector; must be the second.
 
 -            pBuffer += SizeOfVector128InChars;
 +                pBuffer += SizeOfVector128InChars;
             firstVector = secondVector;
 
         FoundNonAsciiDataInFirstVector:
 
 -            // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element
 -            // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
 -            // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored..
 +            // See comment earlier in the method for an explanation of how the below logic works.
             currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 +
 +        FoundNonAsciiDataInCurrentMask:
 +
 +            // See comment earlier in the method accounting for the 0x8000 and 0x0080 bits set after the WORD-sized operations.
 +
             currentMask &= NonAsciiDataSeenMask;
 
             // Now, the mask contains - from the LSB - a 0b00 pair for each ASCII char we saw, and a 0b10 pair for each non-ASCII char.
	diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
	index 163d62208c6..864bcb58b2e 100644
	--- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
	+++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
	@@ -794,6 +794,8 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
	// This method is written such that control generally flows top-to-bottom, avoiding
	// jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII
	// data, we jump out of the hot paths to targets at the end of the method.
	+
	+ Vector128<ushort> asciiMaskForTestZ = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware
	Vector128<ushort> asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW
	const uint NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether 'currentMask' contains non-ASCII data

	@@ -805,9 +807,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin

	firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load

	- if (VectorContainsNonAsciiChar(firstVector))
	+ // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element
	+ // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
	+ // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored.
	+
	+ currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
	+
	+ if ((currentMask & NonAsciiDataSeenMask) != 0)
	{
	- goto FoundNonAsciiDataInFirstVector;
	+ goto FoundNonAsciiDataInCurrentMask;
	}

	// If we have less than 32 bytes to process, just go straight to the final unaligned
	@@ -880,9 +888,23 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
	// Remember, at this point pBuffer is still aligned.

	firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer);
	- if (VectorContainsNonAsciiChar(firstVector))
	+ if (Sse41.IsSupported)
	{
	- goto FoundNonAsciiDataInFirstVector;
	+ // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
	+ // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
	+ if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
	+ {
	+ goto FoundNonAsciiDataInFirstVector;
	+ }
	+ }
	+ else
	+ {
	+ // See comment earlier in the method for an explanation of how the below logic works.
	+ currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
	+ if ((currentMask & NonAsciiDataSeenMask) != 0)
	+ {
	+ goto FoundNonAsciiDataInCurrentMask;
	+ }
	}

	IncrementCurrentOffsetBeforeFinalUnalignedVectorRead:
	@@ -899,9 +921,23 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
	pBuffer = (char)((byte)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes);
	firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load

	- if (VectorContainsNonAsciiChar(firstVector))
	+ if (Sse41.IsSupported)
	+ {
	+ // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
	+ // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
	+ if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
	+ {
	+ goto FoundNonAsciiDataInFirstVector;
	+ }
	+ }
	+ else
	{
	- goto FoundNonAsciiDataInFirstVector;
	+ // See comment earlier in the method for an explanation of how the below logic works.
	+ currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
	+ if ((currentMask & NonAsciiDataSeenMask) != 0)
	+ {
	+ goto FoundNonAsciiDataInCurrentMask;
	+ }
	}

	pBuffer += SizeOfVector128InChars;
	@@ -917,23 +953,37 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
	// We don't know if the first or the second vector contains non-ASCII data. Check the first
	// vector, and if that's all-ASCII then the second vector must be the culprit. Either way
	// we'll make sure the first vector local is the one that contains the non-ASCII data.
	-
	- if (VectorContainsNonAsciiChar(firstVector))
	+ // See comment earlier in the method for an explanation of how the below logic works.
	+ if (Sse41.IsSupported)
	{
	- goto FoundNonAsciiDataInFirstVector;
	+ if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
	+ {
	+ goto FoundNonAsciiDataInFirstVector;
	+ }
	+ }
	+ else
	+ {
	+ currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
	+ if ((currentMask & NonAsciiDataSeenMask) != 0)
	+ {
	+ goto FoundNonAsciiDataInCurrentMask;
	+ }
	}

	- // Wasn't the first vector; must be the second.
	+ // Wasn't the first vector; must be the second.

	- pBuffer += SizeOfVector128InChars;
	+ pBuffer += SizeOfVector128InChars;
	firstVector = secondVector;

	FoundNonAsciiDataInFirstVector:

	- // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element
	- // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
	- // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored..
	+ // See comment earlier in the method for an explanation of how the below logic works.
	currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
	+
	+ FoundNonAsciiDataInCurrentMask:
	+
	+ // See comment earlier in the method accounting for the 0x8000 and 0x0080 bits set after the WORD-sized operations.
	+
	currentMask &= NonAsciiDataSeenMask;

	// Now, the mask contains - from the LSB - a 0b00 pair for each ASCII char we saw, and a 0b10 pair for each non-ASCII char.
No results found