Created
June 19, 2022 05:09
-
-
Save GlaireDaggers/0d37e3b624456a7ed2a958478801d37c to your computer and use it in GitHub Desktop.
C# algorithm for encoding a UTF32 codepoint to UTF8 bytes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private const int SURROGATE_LOW_BITS = 0x7FF; | |
private const int MAX_SURROGATE = 0xDFFF; | |
private const int MAX_FOUR_BYTE = 0x10FFFF; | |
private const int ONE_BYTE_BITS = 7; | |
private const int TWO_BYTE_BITS = 11; | |
private const int TWO_BYTE_PREFIX = 0xC0; | |
private const int THREE_BYTE_BITS = 16; | |
private const int THREE_BYTE_PREFIX = 0xE0; | |
private const int FOUR_BYTE_PREFIX = 0xF0; | |
private const int CONTINUATION_BYTE = 0x80; | |
private const int CONTINUATION_MASK = 0x3F; | |
private void EncodeUTF8(uint codepoint, AppendBuffer<byte> buffer) | |
{ | |
if ((codepoint | SURROGATE_LOW_BITS) == MAX_SURROGATE || codepoint > MAX_FOUR_BYTE) | |
{ | |
throw new ArgumentOutOfRangeException(nameof(codepoint)); | |
} | |
int bytes_written = 0; | |
if ((codepoint >> ONE_BYTE_BITS) == 0) | |
{ | |
buffer.Append((byte)codepoint); | |
bytes_written = 1; | |
} | |
else if ((codepoint >> TWO_BYTE_BITS) == 0) | |
{ | |
buffer.Append((byte)(TWO_BYTE_PREFIX | (codepoint >> 6))); | |
bytes_written = 2; | |
} | |
else if ((codepoint >> THREE_BYTE_BITS) == 0) | |
{ | |
buffer.Append((byte)(THREE_BYTE_PREFIX | (codepoint >> 12))); | |
bytes_written = 3; | |
} | |
else | |
{ | |
buffer.Append((byte)(FOUR_BYTE_PREFIX | (codepoint >> 18))); | |
bytes_written = 4; | |
} | |
switch (bytes_written) | |
{ | |
case 4: buffer.Append((byte)(CONTINUATION_BYTE | ((codepoint >> 12) & CONTINUATION_MASK))); goto case 3; | |
case 3: buffer.Append((byte)(CONTINUATION_BYTE | ((codepoint >> 6) & CONTINUATION_MASK))); goto case 2; | |
case 2: buffer.Append((byte)(CONTINUATION_BYTE | (codepoint & CONTINUATION_MASK))); goto default; | |
default: return; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment