Last active
January 15, 2020 03:10
-
-
Save commonsensesoftware/aa59ac440650437b6baa607b21b28522 to your computer and use it in GitHub Desktop.
Sample Base64Url Implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Microsoft.Example | |
{ | |
using System; | |
using System.Buffers; | |
using System.Diagnostics.CodeAnalysis; | |
using System.Runtime.CompilerServices; | |
using Microsoft.Example.Text; | |
using static System.Text.Encoding; | |
/// <summary> | |
/// Represents the Base 64 Encoding with URL and Filename Safe Alphabet | |
/// <seealso href="https://tools.ietf.org/html/rfc4648#section-4"/>. | |
/// </summary> | |
public static class Base64Url | |
{ | |
// REF: based on value found in: https://github.com/dotnet/aspnetcore/blob/master/src/Shared/WebEncoders/WebEncoders.cs | |
private const int RentSize = 128; | |
private const byte Pad = (byte)'='; | |
/// <summary> | |
/// Encodes the specified binary as a Base 64 URL. | |
/// </summary> | |
/// <param name="binary">The binary to encode.</param> | |
/// <returns>The encoded text in the Base 64 URL format.</returns> | |
public static string Encode(ReadOnlySpan<byte> binary) | |
{ | |
var bufferSize = Base64UrlEncoding.GetMaxEncodedToUtf8Length(binary.Length); | |
var rented = default(byte[]); | |
Span<byte> buffer = bufferSize <= RentSize ? stackalloc byte[bufferSize] : Rent(bufferSize, ref rented); | |
Base64UrlEncoding.EncodeToUtf8(binary, buffer, out var consumed, out var written); | |
var length = LengthWithoutPadding(buffer, written); | |
var base64Url = ASCII.GetString(buffer.Slice(0, length)); | |
if (rented != null) | |
{ | |
ArrayPool<byte>.Shared.Return(rented); | |
} | |
return base64Url; | |
} | |
/// <summary> | |
/// Encodes the specified binary as a Base 64 URL. | |
/// </summary> | |
/// <param name="binary">The binary to encode.</param> | |
/// <returns>The encoded text in the Base 64 URL format.</returns> | |
public static string Encode(byte[] binary) => Encode(binary.AsSpan()); | |
/// <summary> | |
/// Encodes the specified text as a Base 64 URL. | |
/// </summary> | |
/// <param name="text">The text to encode.</param> | |
/// <returns>The encoded text in the Base 64 URL format.</returns> | |
public static string Encode(string text) | |
{ | |
var bufferSize = UTF8.GetMaxByteCount(text.Length); | |
var rented = default(byte[]); | |
Span<byte> buffer = bufferSize <= RentSize ? stackalloc byte[bufferSize] : Rent(bufferSize, ref rented); | |
var length = UTF8.GetBytes(text.AsSpan(), buffer); | |
var base64Url = Encode(buffer.Slice(0, length)); | |
if (rented != null) | |
{ | |
ArrayPool<byte>.Shared.Return(rented); | |
} | |
return base64Url; | |
} | |
/// <summary> | |
/// Decodes the specified Base 64 URL as plain text. | |
/// </summary> | |
/// <param name="base64Url">The Base 64 URL encoded value to decode.</param> | |
/// <returns>The decoded, plain text.</returns> | |
public static string Decode(string base64Url) | |
{ | |
var buffer = base64Url.AsSpan(); | |
var length = buffer.Length; | |
var padding = ComputeRequiredPadding(length); | |
var inputSize = length + padding; | |
var outputSize = Base64UrlEncoding.GetMaxDecodedFromUtf8Length(inputSize); | |
var rentedInput = default(byte[]); | |
var rentedOutput = default(byte[]); | |
Span<byte> input = inputSize <= RentSize ? stackalloc byte[inputSize] : Rent(inputSize, ref rentedInput); | |
Span<byte> output = outputSize <= RentSize ? stackalloc byte[outputSize] : Rent(outputSize, ref rentedOutput); | |
ASCII.GetBytes(buffer, input); | |
AppendPadding(input, padding); | |
Base64UrlEncoding.DecodeFromUtf8(input, output, out var consumed, out var written); | |
if (rentedInput != null) | |
{ | |
ArrayPool<byte>.Shared.Return(rentedInput); | |
} | |
var text = UTF8.GetString(output.Slice(0, written)); | |
if (rentedOutput != null) | |
{ | |
ArrayPool<byte>.Shared.Return(rentedOutput); | |
} | |
return text; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static Span<byte> Rent(int size, [AllowNull] ref byte[] rented) => | |
new Span<byte>(rented = ArrayPool<byte>.Shared.Rent(size), 0, size); | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static int LengthWithoutPadding(Span<byte> output, int size) | |
{ | |
if (output[^2] == Pad) | |
{ | |
return size - 2; | |
} | |
if (output[^1] == Pad) | |
{ | |
return size - 1; | |
} | |
return size; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static int ComputeRequiredPadding(int length) => 4 - (length % 4); | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static void AppendPadding(Span<byte> input, int amount) => | |
input.Slice(input.Length - amount, amount).Fill(Pad); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Microsoft.Example.Text | |
{ | |
using System; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
internal static partial class Base64UrlEncoding | |
{ | |
private static TVector ReadVector<TVector>(ReadOnlySpan<sbyte> data) | |
{ | |
ref sbyte tmp = ref MemoryMarshal.GetReference(data); | |
return Unsafe.As<sbyte, TVector>(ref tmp); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Microsoft.Example.Text | |
{ | |
using System; | |
using System.Buffers; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
internal static partial class Base64UrlEncoding | |
{ | |
public static unsafe OperationStatus DecodeFromUtf8(ReadOnlySpan<byte> utf8, Span<byte> bytes, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) | |
{ | |
if (utf8.IsEmpty) | |
{ | |
bytesConsumed = 0; | |
bytesWritten = 0; | |
return OperationStatus.Done; | |
} | |
fixed (byte* srcBytes = &MemoryMarshal.GetReference(utf8)) | |
fixed (byte* destBytes = &MemoryMarshal.GetReference(bytes)) | |
{ | |
int srcLength = utf8.Length & ~0x3; // only decode input up to the closest multiple of 4. | |
int destLength = bytes.Length; | |
int maxSrcLength = srcLength; | |
int decodedLength = GetMaxDecodedFromUtf8Length(srcLength); | |
// max. 2 padding chars | |
if (destLength < decodedLength - 2) | |
{ | |
// For overflow see comment below | |
maxSrcLength = destLength / 3 * 4; | |
} | |
byte* src = srcBytes; | |
byte* dest = destBytes; | |
byte* srcEnd = srcBytes + (uint)srcLength; | |
byte* srcMax = srcBytes + (uint)maxSrcLength; | |
if (maxSrcLength >= 24) | |
{ | |
byte* end = srcMax - 45; | |
if (Avx2.IsSupported && (end >= src)) | |
{ | |
Avx2Decode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); | |
if (src == srcEnd) | |
{ | |
goto DoneExit; | |
} | |
} | |
end = srcMax - 24; | |
if (Ssse3.IsSupported && (end >= src)) | |
{ | |
Ssse3Decode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); | |
if (src == srcEnd) | |
{ | |
goto DoneExit; | |
} | |
} | |
} | |
// Last bytes could have padding characters, so process them separately and treat them as valid only if isFinalBlock is true | |
// if isFinalBlock is false, padding characters are considered invalid | |
int skipLastChunk = isFinalBlock ? 4 : 0; | |
if (destLength >= decodedLength) | |
{ | |
maxSrcLength = srcLength - skipLastChunk; | |
} | |
else | |
{ | |
// This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 1610612733) | |
// Therefore, (destLength / 3) * 4 will always be less than 2147483641 | |
maxSrcLength = (destLength / 3) * 4; | |
} | |
ref sbyte decodingMap = ref MemoryMarshal.GetReference(s_decodingMap); | |
srcMax = srcBytes + (uint)maxSrcLength; | |
while (src < srcMax) | |
{ | |
int result = Decode(src, ref decodingMap); | |
if (result < 0) | |
{ | |
goto InvalidDataExit; | |
} | |
WriteThreeLowOrderBytes(dest, result); | |
src += 4; | |
dest += 3; | |
} | |
if (maxSrcLength != srcLength - skipLastChunk) | |
{ | |
goto DestinationTooSmallExit; | |
} | |
// If input is less than 4 bytes, srcLength == sourceIndex == 0 | |
// If input is not a multiple of 4, sourceIndex == srcLength != 0 | |
if (src == srcEnd) | |
{ | |
if (isFinalBlock) | |
{ | |
goto InvalidDataExit; | |
} | |
goto NeedMoreDataExit; | |
} | |
// if isFinalBlock is false, we will never reach this point | |
// Handle last four bytes. There are 0, 1, 2 padding chars. | |
uint t0 = srcEnd[-4]; | |
uint t1 = srcEnd[-3]; | |
uint t2 = srcEnd[-2]; | |
uint t3 = srcEnd[-1]; | |
int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); | |
int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); | |
i0 <<= 18; | |
i1 <<= 12; | |
i0 |= i1; | |
byte* destMax = destBytes + (uint)destLength; | |
if (t3 != EncodingPad) | |
{ | |
int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); | |
int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); | |
i2 <<= 6; | |
i0 |= i3; | |
i0 |= i2; | |
if (i0 < 0) | |
{ | |
goto InvalidDataExit; | |
} | |
if (dest + 3 > destMax) | |
{ | |
goto DestinationTooSmallExit; | |
} | |
WriteThreeLowOrderBytes(dest, i0); | |
dest += 3; | |
} | |
else if (t2 != EncodingPad) | |
{ | |
int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); | |
i2 <<= 6; | |
i0 |= i2; | |
if (i0 < 0) | |
{ | |
goto InvalidDataExit; | |
} | |
if (dest + 2 > destMax) | |
{ | |
goto DestinationTooSmallExit; | |
} | |
dest[0] = (byte)(i0 >> 16); | |
dest[1] = (byte)(i0 >> 8); | |
dest += 2; | |
} | |
else | |
{ | |
if (i0 < 0) | |
{ | |
goto InvalidDataExit; | |
} | |
if (dest + 1 > destMax) | |
{ | |
goto DestinationTooSmallExit; | |
} | |
dest[0] = (byte)(i0 >> 16); | |
dest += 1; | |
} | |
src += 4; | |
if (srcLength != utf8.Length) | |
{ | |
goto InvalidDataExit; | |
} | |
DoneExit: | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.Done; | |
DestinationTooSmallExit: | |
if (srcLength != utf8.Length && isFinalBlock) | |
{ | |
goto InvalidDataExit; // if input is not a multiple of 4, and there is no more data, return invalid data instead | |
} | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.DestinationTooSmall; | |
NeedMoreDataExit: | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.NeedMoreData; | |
InvalidDataExit: | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.InvalidData; | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static int GetMaxDecodedFromUtf8Length(int length) | |
{ | |
if (length < 0) | |
{ | |
throw new ArgumentOutOfRangeException(nameof(length)); | |
} | |
return (length >> 2) * 3; | |
} | |
public static unsafe OperationStatus DecodeFromUtf8InPlace(Span<byte> buffer, out int bytesWritten) | |
{ | |
if (buffer.IsEmpty) | |
{ | |
bytesWritten = 0; | |
return OperationStatus.Done; | |
} | |
fixed (byte* bufferBytes = &MemoryMarshal.GetReference(buffer)) | |
{ | |
int bufferLength = buffer.Length; | |
uint sourceIndex = 0; | |
uint destIndex = 0; | |
// only decode input if it is a multiple of 4 | |
if (bufferLength != ((bufferLength >> 2) * 4)) | |
{ | |
goto InvalidExit; | |
} | |
if (bufferLength == 0) | |
{ | |
goto DoneExit; | |
} | |
ref sbyte decodingMap = ref MemoryMarshal.GetReference(s_decodingMap); | |
while (sourceIndex < bufferLength - 4) | |
{ | |
int result = Decode(bufferBytes + sourceIndex, ref decodingMap); | |
if (result < 0) | |
{ | |
goto InvalidExit; | |
} | |
WriteThreeLowOrderBytes(bufferBytes + destIndex, result); | |
destIndex += 3; | |
sourceIndex += 4; | |
} | |
uint t0 = bufferBytes[bufferLength - 4]; | |
uint t1 = bufferBytes[bufferLength - 3]; | |
uint t2 = bufferBytes[bufferLength - 2]; | |
uint t3 = bufferBytes[bufferLength - 1]; | |
int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); | |
int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); | |
i0 <<= 18; | |
i1 <<= 12; | |
i0 |= i1; | |
if (t3 != EncodingPad) | |
{ | |
int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); | |
int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); | |
i2 <<= 6; | |
i0 |= i3; | |
i0 |= i2; | |
if (i0 < 0) | |
{ | |
goto InvalidExit; | |
} | |
WriteThreeLowOrderBytes(bufferBytes + destIndex, i0); | |
destIndex += 3; | |
} | |
else if (t2 != EncodingPad) | |
{ | |
int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); | |
i2 <<= 6; | |
i0 |= i2; | |
if (i0 < 0) | |
{ | |
goto InvalidExit; | |
} | |
bufferBytes[destIndex] = (byte)(i0 >> 16); | |
bufferBytes[destIndex + 1] = (byte)(i0 >> 8); | |
destIndex += 2; | |
} | |
else | |
{ | |
if (i0 < 0) | |
{ | |
goto InvalidExit; | |
} | |
bufferBytes[destIndex] = (byte)(i0 >> 16); | |
destIndex += 1; | |
} | |
DoneExit: | |
bytesWritten = (int)destIndex; | |
return OperationStatus.Done; | |
InvalidExit: | |
bytesWritten = (int)destIndex; | |
return OperationStatus.InvalidData; | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) | |
{ | |
// If we have AVX2 support, pick off 32 bytes at a time for as long as we can, | |
// but make sure that we quit before seeing any == markers at the end of the | |
// string. Also, because we write 8 zeroes at the end of the output, ensure | |
// that there are at least 11 valid bytes of input data remaining to close the | |
// gap. 32 + 2 + 11 = 45 bytes. | |
// See SSSE3-version below for an explanation of how the code works. | |
// The JIT won't hoist these "constants", so help it | |
Vector256<sbyte> lutHi = ReadVector<Vector256<sbyte>>(s_avxDecodeLutHi); | |
Vector256<sbyte> lutLo = ReadVector<Vector256<sbyte>>(s_avxDecodeLutLo); | |
Vector256<sbyte> lutShift = ReadVector<Vector256<sbyte>>(s_avxDecodeLutShift); | |
Vector256<sbyte> mask2F = Vector256.Create((sbyte)'/'); | |
Vector256<sbyte> mergeConstant0 = Vector256.Create(0x01400140).AsSByte(); | |
Vector256<short> mergeConstant1 = Vector256.Create(0x00011000).AsInt16(); | |
Vector256<sbyte> packBytesInLaneMask = ReadVector<Vector256<sbyte>>(s_avxDecodePackBytesInLaneMask); | |
Vector256<int> packLanesControl = ReadVector<Vector256<sbyte>>(s_avxDecodePackLanesControl).AsInt32(); | |
byte* src = srcBytes; | |
byte* dest = destBytes; | |
//while (remaining >= 45) | |
do | |
{ | |
Vector256<sbyte> str = Avx.LoadVector256(src).AsSByte(); | |
Vector256<sbyte> hiNibbles = Avx2.And(Avx2.ShiftRightLogical(str.AsInt32(), 4).AsSByte(), mask2F); | |
Vector256<sbyte> loNibbles = Avx2.And(str, mask2F); | |
Vector256<sbyte> hi = Avx2.Shuffle(lutHi, hiNibbles); | |
Vector256<sbyte> lo = Avx2.Shuffle(lutLo, loNibbles); | |
if (!Avx.TestZ(lo, hi)) | |
{ | |
break; | |
} | |
Vector256<sbyte> eq2F = Avx2.CompareEqual(str, mask2F); | |
Vector256<sbyte> shift = Avx2.Shuffle(lutShift, Avx2.Add(eq2F, hiNibbles)); | |
str = Avx2.Add(str, shift); | |
// in, lower lane, bits, upper case are most significant bits, lower case are least significant bits: | |
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA | |
Vector256<short> merge_ab_and_bc = Avx2.MultiplyAddAdjacent(str.AsByte(), mergeConstant0); | |
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK | |
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH | |
// 0000eeee FFffffff 0000DDDD DDddEEEE | |
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB | |
Vector256<int> output = Avx2.MultiplyAddAdjacent(merge_ab_and_bc, mergeConstant1); | |
// 00000000 JJJJJJjj KKKKkkkk LLllllll | |
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii | |
// 00000000 DDDDDDdd EEEEeeee FFffffff | |
// 00000000 AAAAAAaa BBBBbbbb CCcccccc | |
// Pack bytes together in each lane: | |
output = Avx2.Shuffle(output.AsSByte(), packBytesInLaneMask).AsInt32(); | |
// 00000000 00000000 00000000 00000000 | |
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii | |
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee | |
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa | |
// Pack lanes | |
str = Avx2.PermuteVar8x32(output, packLanesControl).AsSByte(); | |
Avx.Store(dest, str.AsByte()); | |
src += 32; | |
dest += 24; | |
} | |
while (src <= srcEnd); | |
srcBytes = src; | |
destBytes = dest; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) | |
{ | |
// If we have SSSE3 support, pick off 16 bytes at a time for as long as we can, | |
// but make sure that we quit before seeing any == markers at the end of the | |
// string. Also, because we write four zeroes at the end of the output, ensure | |
// that there are at least 6 valid bytes of input data remaining to close the | |
// gap. 16 + 2 + 6 = 24 bytes. | |
// The input consists of six character sets in the Base64 alphabet, | |
// which we need to map back to the 6-bit values they represent. | |
// There are three ranges, two singles, and then there's the rest. | |
// | |
// # From To Add Characters | |
// 1 [43] [62] +19 + | |
// 2 [47] [63] +16 / | |
// 3 [48..57] [52..61] +4 0..9 | |
// 4 [65..90] [0..25] -65 A..Z | |
// 5 [97..122] [26..51] -71 a..z | |
// (6) Everything else => invalid input | |
// We will use LUTS for character validation & offset computation | |
// Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, | |
// this allows to mask with 0x2F instead of 0x0F and thus save one constant declaration (register and/or memory access) | |
// For offsets: | |
// Perfect hash for lut = ((src>>4)&0x2F)+((src==0x2F)?0xFF:0x00) | |
// 0000 = garbage | |
// 0001 = / | |
// 0010 = + | |
// 0011 = 0-9 | |
// 0100 = A-Z | |
// 0101 = A-Z | |
// 0110 = a-z | |
// 0111 = a-z | |
// 1000 >= garbage | |
// For validation, here's the table. | |
// A character is valid if and only if the AND of the 2 lookups equals 0: | |
// hi \ lo 0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111 | |
// LUT 0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A | |
// 0000 0X10 char NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI | |
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 0001 0x10 char DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US | |
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 0010 0x01 char ! " # $ % & ' ( ) * + , - . / | |
// andlut 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00 | |
// 0011 0x02 char 0 1 2 3 4 5 6 7 8 9 : ; < = > ? | |
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02 | |
// 0100 0x04 char @ A B C D E F G H I J K L M N 0 | |
// andlut 0x04 0x00 0x00 0x00 0X00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 | |
// 0101 0x08 char P Q R S T U V W X Y Z [ \ ] ^ _ | |
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08 | |
// 0110 0x04 char ` a b c d e f g h i j k l m n o | |
// andlut 0x04 0x00 0x00 0x00 0X00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 | |
// 0111 0X08 char p q r s t u v w x y z { | } ~ | |
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08 | |
// 1000 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1001 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1010 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1011 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1100 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1101 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1110 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 | |
// The JIT won't hoist these "constants", so help it | |
Vector128<sbyte> lutHi = ReadVector<Vector128<sbyte>>(s_sseDecodeLutHi); | |
Vector128<sbyte> lutLo = ReadVector<Vector128<sbyte>>(s_sseDecodeLutLo); | |
Vector128<sbyte> lutShift = ReadVector<Vector128<sbyte>>(s_sseDecodeLutShift); | |
Vector128<sbyte> mask2F = Vector128.Create((sbyte)'/'); | |
Vector128<sbyte> mergeConstant0 = Vector128.Create(0x01400140).AsSByte(); | |
Vector128<short> mergeConstant1 = Vector128.Create(0x00011000).AsInt16(); | |
Vector128<sbyte> packBytesMask = ReadVector<Vector128<sbyte>>(s_sseDecodePackBytesMask); | |
Vector128<sbyte> zero = Vector128<sbyte>.Zero; | |
byte* src = srcBytes; | |
byte* dest = destBytes; | |
//while (remaining >= 24) | |
do | |
{ | |
Vector128<sbyte> str = Sse2.LoadVector128(src).AsSByte(); | |
// lookup | |
Vector128<sbyte> hiNibbles = Sse2.And(Sse2.ShiftRightLogical(str.AsInt32(), 4).AsSByte(), mask2F); | |
Vector128<sbyte> loNibbles = Sse2.And(str, mask2F); | |
Vector128<sbyte> hi = Ssse3.Shuffle(lutHi, hiNibbles); | |
Vector128<sbyte> lo = Ssse3.Shuffle(lutLo, loNibbles); | |
// Check for invalid input: if any "and" values from lo and hi are not zero, | |
// fall back on bytewise code to do error checking and reporting: | |
if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.And(lo, hi), zero)) != 0) | |
{ | |
break; | |
} | |
Vector128<sbyte> eq2F = Sse2.CompareEqual(str, mask2F); | |
Vector128<sbyte> shift = Ssse3.Shuffle(lutShift, Sse2.Add(eq2F, hiNibbles)); | |
// Now simply add the delta values to the input: | |
str = Sse2.Add(str, shift); | |
// in, bits, upper case are most significant bits, lower case are least significant bits | |
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA | |
Vector128<short> merge_ab_and_bc = Ssse3.MultiplyAddAdjacent(str.AsByte(), mergeConstant0); | |
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK | |
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH | |
// 0000eeee FFffffff 0000DDDD DDddEEEE | |
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB | |
Vector128<int> output = Sse2.MultiplyAddAdjacent(merge_ab_and_bc, mergeConstant1); | |
// 00000000 JJJJJJjj KKKKkkkk LLllllll | |
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii | |
// 00000000 DDDDDDdd EEEEeeee FFffffff | |
// 00000000 AAAAAAaa BBBBbbbb CCcccccc | |
// Pack bytes together: | |
str = Ssse3.Shuffle(output.AsSByte(), packBytesMask); | |
// 00000000 00000000 00000000 00000000 | |
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii | |
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee | |
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa | |
Sse2.Store(dest, str.AsByte()); | |
src += 16; | |
dest += 12; | |
} | |
while (src <= srcEnd); | |
srcBytes = src; | |
destBytes = dest; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe int Decode(byte* encodedBytes, ref sbyte decodingMap) | |
{ | |
uint t0 = encodedBytes[0]; | |
uint t1 = encodedBytes[1]; | |
uint t2 = encodedBytes[2]; | |
uint t3 = encodedBytes[3]; | |
int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); | |
int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); | |
int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); | |
int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); | |
i0 <<= 18; | |
i1 <<= 12; | |
i2 <<= 6; | |
i0 |= i3; | |
i1 |= i2; | |
i0 |= i1; | |
return i0; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) | |
{ | |
destination[0] = (byte)(value >> 16); | |
destination[1] = (byte)(value >> 8); | |
destination[2] = (byte)value; | |
} | |
// Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests) | |
private static ReadOnlySpan<sbyte> s_decodingMap => new sbyte[] { | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, //62 is placed at index 45 (for -) | |
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =) | |
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, //0-25 are placed at index 65-90 (for A-Z), 63 at index 95 (for _) | |
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index 97-122 (for a-z) | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
}; | |
private static ReadOnlySpan<sbyte> s_sseDecodePackBytesMask => new sbyte[] { | |
2, 1, 0, 6, | |
5, 4, 10, 9, | |
8, 14, 13, 12, | |
-1, -1, -1, -1 | |
}; | |
private static ReadOnlySpan<sbyte> s_sseDecodeLutLo => new sbyte[] { | |
0x15, 0x11, 0x11, 0x11, | |
0x11, 0x11, 0x11, 0x11, | |
0x11, 0x11, 0x13, 0x1A, | |
0x1B, 0x1B, 0x1B, 0x1A | |
}; | |
private static ReadOnlySpan<sbyte> s_sseDecodeLutHi => new sbyte[] { | |
0x10, 0x10, 0x01, 0x02, | |
0x04, 0x08, 0x04, 0x08, | |
0x10, 0x10, 0x10, 0x10, | |
0x10, 0x10, 0x10, 0x10 | |
}; | |
private static ReadOnlySpan<sbyte> s_sseDecodeLutShift => new sbyte[] { | |
0, 16, 19, 4, | |
-65, -65, -71, -71, | |
0, 0, 0, 0, | |
0, 0, 0, 0 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxDecodePackBytesInLaneMask => new sbyte[] { | |
2, 1, 0, 6, | |
5, 4, 10, 9, | |
8, 14, 13, 12, | |
-1, -1, -1, -1, | |
2, 1, 0, 6, | |
5, 4, 10, 9, | |
8, 14, 13, 12, | |
-1, -1, -1, -1 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxDecodePackLanesControl => new sbyte[] { | |
0, 0, 0, 0, | |
1, 0, 0, 0, | |
2, 0, 0, 0, | |
4, 0, 0, 0, | |
5, 0, 0, 0, | |
6, 0, 0, 0, | |
-1, -1, -1, -1, | |
-1, -1, -1, -1 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxDecodeLutLo => new sbyte[] { | |
0x15, 0x11, 0x11, 0x11, | |
0x11, 0x11, 0x11, 0x11, | |
0x11, 0x11, 0x13, 0x1A, | |
0x1B, 0x1B, 0x1B, 0x1A, | |
0x15, 0x11, 0x11, 0x11, | |
0x11, 0x11, 0x11, 0x11, | |
0x11, 0x11, 0x13, 0x1A, | |
0x1B, 0x1B, 0x1B, 0x1A | |
}; | |
private static ReadOnlySpan<sbyte> s_avxDecodeLutHi => new sbyte[] { | |
0x10, 0x10, 0x01, 0x02, | |
0x04, 0x08, 0x04, 0x08, | |
0x10, 0x10, 0x10, 0x10, | |
0x10, 0x10, 0x10, 0x10, | |
0x10, 0x10, 0x01, 0x02, | |
0x04, 0x08, 0x04, 0x08, | |
0x10, 0x10, 0x10, 0x10, | |
0x10, 0x10, 0x10, 0x10 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxDecodeLutShift => new sbyte[] { | |
0, 16, 19, 4, | |
-65, -65, -71, -71, | |
0, 0, 0, 0, | |
0, 0, 0, 0, | |
0, 16, 19, 4, | |
-65, -65, -71, -71, | |
0, 0, 0, 0, | |
0, 0, 0, 0 | |
}; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Microsoft.Example.Text | |
{ | |
using System; | |
using System.Buffers; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
internal static partial class Base64UrlEncoding | |
{ | |
public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> utf8, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) | |
{ | |
if (bytes.IsEmpty) | |
{ | |
bytesConsumed = 0; | |
bytesWritten = 0; | |
return OperationStatus.Done; | |
} | |
fixed (byte* srcBytes = &MemoryMarshal.GetReference(bytes)) | |
fixed (byte* destBytes = &MemoryMarshal.GetReference(utf8)) | |
{ | |
int srcLength = bytes.Length; | |
int destLength = utf8.Length; | |
int maxSrcLength; | |
if (srcLength <= MaximumEncodeLength && destLength >= GetMaxEncodedToUtf8Length(srcLength)) | |
{ | |
maxSrcLength = srcLength; | |
} | |
else | |
{ | |
maxSrcLength = (destLength >> 2) * 3; | |
} | |
byte* src = srcBytes; | |
byte* dest = destBytes; | |
byte* srcEnd = srcBytes + (uint)srcLength; | |
byte* srcMax = srcBytes + (uint)maxSrcLength; | |
if (maxSrcLength >= 16) | |
{ | |
byte* end = srcMax - 32; | |
if (Avx2.IsSupported && (end >= src)) | |
{ | |
Avx2Encode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); | |
if (src == srcEnd) | |
{ | |
goto DoneExit; | |
} | |
} | |
end = srcMax - 16; | |
if (Ssse3.IsSupported && (end >= src)) | |
{ | |
Ssse3Encode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); | |
if (src == srcEnd) | |
{ | |
goto DoneExit; | |
} | |
} | |
} | |
ref byte encodingMap = ref MemoryMarshal.GetReference(s_encodingMap); | |
uint result = 0; | |
srcMax -= 2; | |
while (src < srcMax) | |
{ | |
result = Encode(src, ref encodingMap); | |
Unsafe.WriteUnaligned(dest, result); | |
src += 3; | |
dest += 4; | |
} | |
if (srcMax + 2 != srcEnd) | |
{ | |
goto DestinationTooSmallExit; | |
} | |
if (!isFinalBlock) | |
{ | |
goto NeedMoreData; | |
} | |
if (src + 1 == srcEnd) | |
{ | |
result = EncodeAndPadTwo(src, ref encodingMap); | |
Unsafe.WriteUnaligned(dest, result); | |
src += 1; | |
dest += 4; | |
} | |
else if (src + 2 == srcEnd) | |
{ | |
result = EncodeAndPadOne(src, ref encodingMap); | |
Unsafe.WriteUnaligned(dest, result); | |
src += 2; | |
dest += 4; | |
} | |
DoneExit: | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.Done; | |
DestinationTooSmallExit: | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.DestinationTooSmall; | |
NeedMoreData: | |
bytesConsumed = (int)(src - srcBytes); | |
bytesWritten = (int)(dest - destBytes); | |
return OperationStatus.NeedMoreData; | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static int GetMaxEncodedToUtf8Length(int length) | |
{ | |
if ((uint)length > MaximumEncodeLength) | |
{ | |
throw new ArgumentOutOfRangeException(nameof(length)); | |
} | |
return ((length + 2) / 3) * 4; | |
} | |
public static unsafe OperationStatus EncodeToUtf8InPlace(Span<byte> buffer, int dataLength, out int bytesWritten) | |
{ | |
if (buffer.IsEmpty) | |
{ | |
bytesWritten = 0; | |
return OperationStatus.Done; | |
} | |
fixed (byte* bufferBytes = &MemoryMarshal.GetReference(buffer)) | |
{ | |
int encodedLength = GetMaxEncodedToUtf8Length(dataLength); | |
if (buffer.Length < encodedLength) | |
{ | |
goto FalseExit; | |
} | |
int leftover = dataLength - ((dataLength / 3) * 3); // how many bytes after packs of 3 | |
uint destinationIndex = (uint)(encodedLength - 4); | |
uint sourceIndex = (uint)(dataLength - leftover); | |
uint result = 0; | |
ref byte encodingMap = ref MemoryMarshal.GetReference(s_encodingMap); | |
// encode last pack to avoid conditional in the main loop | |
if (leftover != 0) | |
{ | |
if (leftover == 1) | |
{ | |
result = EncodeAndPadTwo(bufferBytes + sourceIndex, ref encodingMap); | |
} | |
else | |
{ | |
result = EncodeAndPadOne(bufferBytes + sourceIndex, ref encodingMap); | |
} | |
Unsafe.WriteUnaligned(bufferBytes + destinationIndex, result); | |
destinationIndex -= 4; | |
} | |
sourceIndex -= 3; | |
while ((int)sourceIndex >= 0) | |
{ | |
result = Encode(bufferBytes + sourceIndex, ref encodingMap); | |
Unsafe.WriteUnaligned(bufferBytes + destinationIndex, result); | |
destinationIndex -= 4; | |
sourceIndex -= 3; | |
} | |
bytesWritten = encodedLength; | |
return OperationStatus.Done; | |
FalseExit: | |
bytesWritten = 0; | |
return OperationStatus.DestinationTooSmall; | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) | |
{ | |
// If we have AVX2 support, pick off 24 bytes at a time for as long as we can. | |
// But because we read 32 bytes at a time, ensure we have enough room to do a | |
// full 32-byte read without segfaulting. | |
// translation from SSSE3 into AVX2 of procedure | |
// This one works with shifted (4 bytes) input in order to | |
// be able to work efficiently in the 2 128-bit lanes | |
// srcBytes, bytes MSB to LSB: | |
// 0 0 0 0 x w v u t s r q p o n m | |
// l k j i h g f e d c b a 0 0 0 0 | |
// The JIT won't hoist these "constants", so help it | |
Vector256<sbyte> shuffleVec = ReadVector<Vector256<sbyte>>(s_avxEncodeShuffleVec); | |
Vector256<sbyte> maskAC = Vector256.Create(0x0fc0fc00).AsSByte(); | |
Vector256<sbyte> maskBB = Vector256.Create(0x003f03f0).AsSByte(); | |
Vector256<ushort> shiftAC = Vector256.Create(0x04000040).AsUInt16(); | |
Vector256<short> shiftBB = Vector256.Create(0x01000010).AsInt16(); | |
Vector256<byte> const51 = Vector256.Create((byte)51); | |
Vector256<sbyte> const25 = Vector256.Create((sbyte)25); | |
Vector256<sbyte> lut = ReadVector<Vector256<sbyte>>(s_avxEncodeLut); | |
byte* src = srcBytes; | |
byte* dest = destBytes; | |
// first load is done at c-0 not to get a segfault | |
Vector256<sbyte> str = Avx.LoadVector256(src).AsSByte(); | |
// shift by 4 bytes, as required by Reshuffle | |
str = Avx2.PermuteVar8x32(str.AsInt32(), ReadVector<Vector256<sbyte>>(s_avxEncodePermuteVec).AsInt32()).AsSByte(); | |
// Next loads are done at src-4, as required by Reshuffle, so shift it once | |
src -= 4; | |
while (true) | |
{ | |
// Reshuffle | |
str = Avx2.Shuffle(str, shuffleVec); | |
// str, bytes MSB to LSB: | |
// w x v w | |
// t u s t | |
// q r p q | |
// n o m n | |
// k l j k | |
// h i g h | |
// e f d e | |
// b c a b | |
Vector256<sbyte> t0 = Avx2.And(str, maskAC); | |
// bits, upper case are most significant bits, lower case are least significant bits. | |
// 0000wwww XX000000 VVVVVV00 00000000 | |
// 0000tttt UU000000 SSSSSS00 00000000 | |
// 0000qqqq RR000000 PPPPPP00 00000000 | |
// 0000nnnn OO000000 MMMMMM00 00000000 | |
// 0000kkkk LL000000 JJJJJJ00 00000000 | |
// 0000hhhh II000000 GGGGGG00 00000000 | |
// 0000eeee FF000000 DDDDDD00 00000000 | |
// 0000bbbb CC000000 AAAAAA00 00000000 | |
Vector256<sbyte> t2 = Avx2.And(str, maskBB); | |
// 00000000 00xxxxxx 000000vv WWWW0000 | |
// 00000000 00uuuuuu 000000ss TTTT0000 | |
// 00000000 00rrrrrr 000000pp QQQQ0000 | |
// 00000000 00oooooo 000000mm NNNN0000 | |
// 00000000 00llllll 000000jj KKKK0000 | |
// 00000000 00iiiiii 000000gg HHHH0000 | |
// 00000000 00ffffff 000000dd EEEE0000 | |
// 00000000 00cccccc 000000aa BBBB0000 | |
Vector256<ushort> t1 = Avx2.MultiplyHigh(t0.AsUInt16(), shiftAC); | |
// 00000000 00wwwwXX 00000000 00VVVVVV | |
// 00000000 00ttttUU 00000000 00SSSSSS | |
// 00000000 00qqqqRR 00000000 00PPPPPP | |
// 00000000 00nnnnOO 00000000 00MMMMMM | |
// 00000000 00kkkkLL 00000000 00JJJJJJ | |
// 00000000 00hhhhII 00000000 00GGGGGG | |
// 00000000 00eeeeFF 00000000 00DDDDDD | |
// 00000000 00bbbbCC 00000000 00AAAAAA | |
Vector256<short> t3 = Avx2.MultiplyLow(t2.AsInt16(), shiftBB); | |
// 00xxxxxx 00000000 00vvWWWW 00000000 | |
// 00uuuuuu 00000000 00ssTTTT 00000000 | |
// 00rrrrrr 00000000 00ppQQQQ 00000000 | |
// 00oooooo 00000000 00mmNNNN 00000000 | |
// 00llllll 00000000 00jjKKKK 00000000 | |
// 00iiiiii 00000000 00ggHHHH 00000000 | |
// 00ffffff 00000000 00ddEEEE 00000000 | |
// 00cccccc 00000000 00aaBBBB 00000000 | |
str = Avx2.Or(t1.AsSByte(), t3.AsSByte()); | |
// 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV | |
// 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS | |
// 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP | |
// 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM | |
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA | |
// Translation | |
// LUT contains Absolute offset for all ranges: | |
// Translate values 0..63 to the Base64 alphabet. There are five sets: | |
// # From To Abs Index Characters | |
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ | |
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz | |
// 2 [52..61] [48..57] -4 [2..11] 0123456789 | |
// 3 [62] [43] -19 12 + | |
// 4 [63] [47] -16 13 / | |
// Create LUT indices from input: | |
// the index for range #0 is right, others are 1 less than expected: | |
Vector256<byte> indices = Avx2.SubtractSaturate(str.AsByte(), const51); | |
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: | |
Vector256<sbyte> mask = Avx2.CompareGreaterThan(str, const25); | |
// substract -1, so add 1 to indices for range #[1..4], All indices are now correct: | |
Vector256<sbyte> tmp = Avx2.Subtract(indices.AsSByte(), mask); | |
// Add offsets to input values: | |
str = Avx2.Add(str, Avx2.Shuffle(lut, tmp)); | |
Avx.Store(dest, str.AsByte()); | |
src += 24; | |
dest += 32; | |
if (src > srcEnd) | |
{ | |
break; | |
} | |
// Load at src-4, as required by Reshuffle (already shifted by -4) | |
str = Avx.LoadVector256(src).AsSByte(); | |
} | |
srcBytes = src + 4; | |
destBytes = dest; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe void Ssse3Encode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) | |
{ | |
// If we have SSSE3 support, pick off 12 bytes at a time for as long as we can. | |
// But because we read 16 bytes at a time, ensure we have enough room to do a | |
// full 16-byte read without segfaulting. | |
// srcBytes, bytes MSB to LSB: | |
// 0 0 0 0 l k j i h g f e d c b a | |
// The JIT won't hoist these "constants", so help it | |
Vector128<sbyte> shuffleVec = ReadVector<Vector128<sbyte>>(s_sseEncodeShuffleVec); | |
Vector128<sbyte> maskAC = Vector128.Create(0x0fc0fc00).AsSByte(); | |
Vector128<sbyte> maskBB = Vector128.Create(0x003f03f0).AsSByte(); | |
Vector128<ushort> shiftAC = Vector128.Create(0x04000040).AsUInt16(); | |
Vector128<short> shiftBB = Vector128.Create(0x01000010).AsInt16(); | |
Vector128<byte> const51 = Vector128.Create((byte)51); | |
Vector128<sbyte> const25 = Vector128.Create((sbyte)25); | |
Vector128<sbyte> lut = ReadVector<Vector128<sbyte>>(s_sseEncodeLut); | |
byte* src = srcBytes; | |
byte* dest = destBytes; | |
//while (remaining >= 16) | |
do | |
{ | |
Vector128<sbyte> str = Sse2.LoadVector128(src).AsSByte(); | |
// Reshuffle | |
str = Ssse3.Shuffle(str, shuffleVec); | |
// str, bytes MSB to LSB: | |
// k l j k | |
// h i g h | |
// e f d e | |
// b c a b | |
Vector128<sbyte> t0 = Sse2.And(str, maskAC); | |
// bits, upper case are most significant bits, lower case are least significant bits | |
// 0000kkkk LL000000 JJJJJJ00 00000000 | |
// 0000hhhh II000000 GGGGGG00 00000000 | |
// 0000eeee FF000000 DDDDDD00 00000000 | |
// 0000bbbb CC000000 AAAAAA00 00000000 | |
Vector128<sbyte> t2 = Sse2.And(str, maskBB); | |
// 00000000 00llllll 000000jj KKKK0000 | |
// 00000000 00iiiiii 000000gg HHHH0000 | |
// 00000000 00ffffff 000000dd EEEE0000 | |
// 00000000 00cccccc 000000aa BBBB0000 | |
Vector128<ushort> t1 = Sse2.MultiplyHigh(t0.AsUInt16(), shiftAC); | |
// 00000000 00kkkkLL 00000000 00JJJJJJ | |
// 00000000 00hhhhII 00000000 00GGGGGG | |
// 00000000 00eeeeFF 00000000 00DDDDDD | |
// 00000000 00bbbbCC 00000000 00AAAAAA | |
Vector128<short> t3 = Sse2.MultiplyLow(t2.AsInt16(), shiftBB); | |
// 00llllll 00000000 00jjKKKK 00000000 | |
// 00iiiiii 00000000 00ggHHHH 00000000 | |
// 00ffffff 00000000 00ddEEEE 00000000 | |
// 00cccccc 00000000 00aaBBBB 00000000 | |
str = Sse2.Or(t1.AsSByte(), t3.AsSByte()); | |
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ | |
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG | |
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD | |
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA | |
// Translation | |
// LUT contains Absolute offset for all ranges: | |
// Translate values 0..63 to the Base64 alphabet. There are five sets: | |
// # From To Abs Index Characters | |
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ | |
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz | |
// 2 [52..61] [48..57] -4 [2..11] 0123456789 | |
// 3 [62] [43] -19 12 + | |
// 4 [63] [47] -16 13 / | |
// Create LUT indices from input: | |
// the index for range #0 is right, others are 1 less than expected: | |
Vector128<byte> indices = Sse2.SubtractSaturate(str.AsByte(), const51); | |
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: | |
Vector128<sbyte> mask = Sse2.CompareGreaterThan(str, const25); | |
// substract -1, so add 1 to indices for range #[1..4], All indices are now correct: | |
Vector128<sbyte> tmp = Sse2.Subtract(indices.AsSByte(), mask); | |
// Add offsets to input values: | |
str = Sse2.Add(str, Ssse3.Shuffle(lut, tmp)); | |
Sse2.Store(dest, str.AsByte()); | |
src += 12; | |
dest += 16; | |
} | |
while (src <= srcEnd); | |
srcBytes = src; | |
destBytes = dest; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe uint Encode(byte* threeBytes, ref byte encodingMap) | |
{ | |
uint t0 = threeBytes[0]; | |
uint t1 = threeBytes[1]; | |
uint t2 = threeBytes[2]; | |
uint i = (t0 << 16) | (t1 << 8) | t2; | |
uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); | |
uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); | |
uint i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); | |
uint i3 = Unsafe.Add(ref encodingMap, (IntPtr)(i & 0x3F)); | |
return i0 | (i1 << 8) | (i2 << 16) | (i3 << 24); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe uint EncodeAndPadOne(byte* twoBytes, ref byte encodingMap) | |
{ | |
uint t0 = twoBytes[0]; | |
uint t1 = twoBytes[1]; | |
uint i = (t0 << 16) | (t1 << 8); | |
uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); | |
uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); | |
uint i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); | |
return i0 | (i1 << 8) | (i2 << 16) | (EncodingPad << 24); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe uint EncodeAndPadTwo(byte* oneByte, ref byte encodingMap) | |
{ | |
uint t0 = oneByte[0]; | |
uint i = t0 << 8; | |
uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 10)); | |
uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 4) & 0x3F)); | |
return i0 | (i1 << 8) | (EncodingPad << 16) | (EncodingPad << 24); | |
} | |
private const uint EncodingPad = '='; // '=', for padding | |
private const int MaximumEncodeLength = (int.MaxValue / 4) * 3; // 1610612733 | |
// Pre-computing this table using a custom string(s_characters) and GenerateEncodingMapAndVerify (found in tests) | |
private static ReadOnlySpan<byte> s_encodingMap => new byte[] { | |
65, 66, 67, 68, 69, 70, 71, 72, //A..H | |
73, 74, 75, 76, 77, 78, 79, 80, //I..P | |
81, 82, 83, 84, 85, 86, 87, 88, //Q..X | |
89, 90, 97, 98, 99, 100, 101, 102, //Y..Z, a..f | |
103, 104, 105, 106, 107, 108, 109, 110, //g..n | |
111, 112, 113, 114, 115, 116, 117, 118, //o..v | |
119, 120, 121, 122, 48, 49, 50, 51, //w..z, 0..3 | |
52, 53, 54, 55, 56, 57, 45, 95 //4..9, -, _ | |
}; | |
private static ReadOnlySpan<sbyte> s_sseEncodeShuffleVec => new sbyte[] { | |
1, 0, 2, 1, | |
4, 3, 5, 4, | |
7, 6, 8, 7, | |
10, 9, 11, 10 | |
}; | |
private static ReadOnlySpan<sbyte> s_sseEncodeLut => new sbyte[] { | |
65, 71, -4, -4, | |
-4, -4, -4, -4, | |
-4, -4, -4, -4, | |
-19, -16, 0, 0 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxEncodePermuteVec => new sbyte[] { | |
0, 0, 0, 0, | |
0, 0, 0, 0, | |
1, 0, 0, 0, | |
2, 0, 0, 0, | |
3, 0, 0, 0, | |
4, 0, 0, 0, | |
5, 0, 0, 0, | |
6, 0, 0, 0 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxEncodeShuffleVec => new sbyte[] { | |
5, 4, 6, 5, | |
8, 7, 9, 8, | |
11, 10, 12, 11, | |
14, 13, 15, 14, | |
1, 0, 2, 1, | |
4, 3, 5, 4, | |
7, 6, 8, 7, | |
10, 9, 11, 10 | |
}; | |
private static ReadOnlySpan<sbyte> s_avxEncodeLut => new sbyte[] { | |
65, 71, -4, -4, | |
-4, -4, -4, -4, | |
-4, -4, -4, -4, | |
-19, -16, 0, 0, | |
65, 71, -4, -4, | |
-4, -4, -4, -4, | |
-4, -4, -4, -4, | |
-19, -16, 0, 0 | |
}; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment