Skip to content

Instantly share code, notes, and snippets.

@leandromoh
Last active September 15, 2021 04:53
Show Gist options
  • Save leandromoh/7be94e2b9aaeb9b0d375bfca9847cbcc to your computer and use it in GitHub Desktop.
Save leandromoh/7be94e2b9aaeb9b0d375bfca9847cbcc to your computer and use it in GitHub Desktop.
private enum State
{
BeforeField,
InField,
InQuotedField,
LineEnd,
}
public static IEnumerable<Memory<char>> TryReadLine(TextReader reader)
{
var bufferLength = (int)Math.Pow(2, 16);
var buffer = ArrayPool<char>.Shared.Rent(bufferLength);
var i = 0;
var j = 0;
var state = State.BeforeField;
int c;
FillBufferAsync(true);
int FillBufferAsync(bool initial)
{
var len = i - j;
if (initial == false)
{
Array.Copy(buffer, j, buffer, 0, len);
}
var totalRead = reader.ReadAsync(buffer, len, bufferLength - len).GetAwaiter().GetResult();
bufferLength = len + totalRead;
i = 0;
j = 0;
return totalRead;
}
int Peek() => i < bufferLength ? buffer[i] : -1;
var hasBufferToConsume = false;
reloop:
j = i;
state = State.BeforeField;
while (hasBufferToConsume = i < bufferLength)
{
c = buffer[i++];
switch (state)
{
case State.BeforeField:
switch (c)
{
case '"':
state = State.InQuotedField;
break;
case ',':
// fields.Add(string.Empty);
break;
case '\r':
// fields.Add(string.Empty);
if (Peek() == '\n')
{
i++;
}
state = State.LineEnd;
goto afterLoop;
case '\n':
// fields.Add(string.Empty);
state = State.LineEnd;
goto afterLoop;
default:
// builder.Append((char)c);
state = State.InField;
break;
}
break;
case State.InField:
switch (c)
{
case ',':
// AddField(fields, builder);
state = State.BeforeField;
break;
case '\r':
// AddField(fields, builder);
if (Peek() == '\n')
{
i++;
}
state = State.LineEnd;
goto afterLoop;
case '\n':
// AddField(fields, builder);
state = State.LineEnd;
goto afterLoop;
default:
// builder.Append((char)c);
break;
}
break;
case State.InQuotedField:
switch (c)
{
case '"':
var nc = Peek();
switch (nc)
{
case '"':
// builder.Append('"');
i++;
break;
case ',':
i++;
// AddField(fields, builder);
state = State.BeforeField;
break;
case '\r':
i++;
// AddField(fields, builder);
if (Peek() == '\n')
{
i++;
}
state = State.LineEnd;
goto afterLoop;
case '\n':
i++;
// AddField(fields, builder);
state = State.LineEnd;
goto afterLoop;
default:
throw new InvalidDataException("Corrupt field found. A double quote is not escaped or there is extra data after a quoted field.");
}
break;
default:
// builder.Append((char)c);
break;
}
break;
default:
throw new NotImplementedException();
}
//if (state == State.LineEnd)
//{
// if (i == 1)
// throw new Exception(); // goto reloop;
// break;
//}
}
afterLoop:
if (hasBufferToConsume == false)
{
if (FillBufferAsync(false) == 0)
{
ArrayPool<char>.Shared.Return(buffer);
yield break;
}
goto reloop;
}
switch (state)
{
case State.BeforeField:
yield return buffer.AsMemory(j, i - j);
goto reloop;
case State.LineEnd:
if ((i == 1 && char.IsWhiteSpace(buffer[0])) == false)
yield return buffer.AsMemory(j, i - j - 1);
goto reloop;
case State.InField:
yield return buffer.AsMemory(j, i - j);
goto reloop;
case State.InQuotedField:
break;
throw new InvalidDataException("When the line ends with a quoted field, the last character should be an unescaped double quote.");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment