Last active
September 15, 2021 04:53
-
-
Save leandromoh/7be94e2b9aaeb9b0d375bfca9847cbcc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private enum State | |
{ | |
BeforeField, | |
InField, | |
InQuotedField, | |
LineEnd, | |
} | |
public static IEnumerable<Memory<char>> TryReadLine(TextReader reader) | |
{ | |
var bufferLength = (int)Math.Pow(2, 16); | |
var buffer = ArrayPool<char>.Shared.Rent(bufferLength); | |
var i = 0; | |
var j = 0; | |
var state = State.BeforeField; | |
int c; | |
FillBufferAsync(true); | |
int FillBufferAsync(bool initial) | |
{ | |
var len = i - j; | |
if (initial == false) | |
{ | |
Array.Copy(buffer, j, buffer, 0, len); | |
} | |
var totalRead = reader.ReadAsync(buffer, len, bufferLength - len).GetAwaiter().GetResult(); | |
bufferLength = len + totalRead; | |
i = 0; | |
j = 0; | |
return totalRead; | |
} | |
int Peek() => i < bufferLength ? buffer[i] : -1; | |
var hasBufferToConsume = false; | |
reloop: | |
j = i; | |
state = State.BeforeField; | |
while (hasBufferToConsume = i < bufferLength) | |
{ | |
c = buffer[i++]; | |
switch (state) | |
{ | |
case State.BeforeField: | |
switch (c) | |
{ | |
case '"': | |
state = State.InQuotedField; | |
break; | |
case ',': | |
// fields.Add(string.Empty); | |
break; | |
case '\r': | |
// fields.Add(string.Empty); | |
if (Peek() == '\n') | |
{ | |
i++; | |
} | |
state = State.LineEnd; | |
goto afterLoop; | |
case '\n': | |
// fields.Add(string.Empty); | |
state = State.LineEnd; | |
goto afterLoop; | |
default: | |
// builder.Append((char)c); | |
state = State.InField; | |
break; | |
} | |
break; | |
case State.InField: | |
switch (c) | |
{ | |
case ',': | |
// AddField(fields, builder); | |
state = State.BeforeField; | |
break; | |
case '\r': | |
// AddField(fields, builder); | |
if (Peek() == '\n') | |
{ | |
i++; | |
} | |
state = State.LineEnd; | |
goto afterLoop; | |
case '\n': | |
// AddField(fields, builder); | |
state = State.LineEnd; | |
goto afterLoop; | |
default: | |
// builder.Append((char)c); | |
break; | |
} | |
break; | |
case State.InQuotedField: | |
switch (c) | |
{ | |
case '"': | |
var nc = Peek(); | |
switch (nc) | |
{ | |
case '"': | |
// builder.Append('"'); | |
i++; | |
break; | |
case ',': | |
i++; | |
// AddField(fields, builder); | |
state = State.BeforeField; | |
break; | |
case '\r': | |
i++; | |
// AddField(fields, builder); | |
if (Peek() == '\n') | |
{ | |
i++; | |
} | |
state = State.LineEnd; | |
goto afterLoop; | |
case '\n': | |
i++; | |
// AddField(fields, builder); | |
state = State.LineEnd; | |
goto afterLoop; | |
default: | |
throw new InvalidDataException("Corrupt field found. A double quote is not escaped or there is extra data after a quoted field."); | |
} | |
break; | |
default: | |
// builder.Append((char)c); | |
break; | |
} | |
break; | |
default: | |
throw new NotImplementedException(); | |
} | |
//if (state == State.LineEnd) | |
//{ | |
// if (i == 1) | |
// throw new Exception(); // goto reloop; | |
// break; | |
//} | |
} | |
afterLoop: | |
if (hasBufferToConsume == false) | |
{ | |
if (FillBufferAsync(false) == 0) | |
{ | |
ArrayPool<char>.Shared.Return(buffer); | |
yield break; | |
} | |
goto reloop; | |
} | |
switch (state) | |
{ | |
case State.BeforeField: | |
yield return buffer.AsMemory(j, i - j); | |
goto reloop; | |
case State.LineEnd: | |
if ((i == 1 && char.IsWhiteSpace(buffer[0])) == false) | |
yield return buffer.AsMemory(j, i - j - 1); | |
goto reloop; | |
case State.InField: | |
yield return buffer.AsMemory(j, i - j); | |
goto reloop; | |
case State.InQuotedField: | |
break; | |
throw new InvalidDataException("When the line ends with a quoted field, the last character should be an unescaped double quote."); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment