Skip to content

Instantly share code, notes, and snippets.

@tintoy
Last active February 27, 2018 22:20
Show Gist options
  • Select an option

  • Save tintoy/640f43519754ce4ac20ac2f3d4b5aee9 to your computer and use it in GitHub Desktop.

Select an option

Save tintoy/640f43519754ce4ac20ac2f3d4b5aee9 to your computer and use it in GitHub Desktop.
Decode UTF-8 from stream (can handle characters that span buffer boundary)
StringBuilder lineBuilder = new StringBuilder();
Encoding encoding = Encoding.UTF8;
// A Decoder is stateful; it remembers if it's halfway through decoding a multi-byte character.
Decoder decoder = encoding.GetDecoder();
byte[] buffer = new byte[10];
int bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length);
while (bytesRead > 0)
{
// AF: This is a little inefficient because it winds up scanning the buffer twice, but I've not had much luck progressively scanning chunks of the buffer.
char[] decodeBuffer = new char[decoder.GetCharCount(buffer, 0, bytesRead)];
int charactersDecoded = decoder.GetChars(buffer, 0, bytesRead, decodeBuffer, 0);
for (int charIndex = 0; charIndex < charactersDecoded; charIndex++)
{
const char CR = '\r';
const char LF = '\n';
char decodedCharacter = decodeBuffer[charIndex];
switch (decodedCharacter)
{
case CR:
{
if (charIndex < charactersDecoded - 1 && decodeBuffer[charIndex + 1] == LF)
{
charIndex++;
goto case LF;
}
break;
}
case LF:
{
string line = lineBuilder.ToString();
lineBuilder.Clear();
Console.WriteLine(line);
break;
}
default:
{
lineBuilder.Append(decodedCharacter);
break;
}
}
}
bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length);
}
// If stream doesn't end with a line-terminator sequence, publish trailing characters as the last line.
if (lineBuilder.Length > 0)
Console.WriteLine(lineBuilder);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment