Last active
March 4, 2020 02:07
-
-
Save jbtule/5f417bdb1ffa41059aa42db43d7b2025 to your computer and use it in GitHub Desktop.
CSVReader.ReadCSVFile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
CSVReader.ReadCSVFile | |
Ported from FSharp.Data into C# | |
Changes by James Tuley 3/3/2020 | |
* Ported into C# | |
* Removes recursuion and adds mutation (since no tail call optimizaiton in c#) | |
Orignal Code: | |
Copyright 2011-2017, Tomas Petricek (http://tomasp.net), | |
Gustavo Guerra (http://functionalflow.co.uk), and other contributors | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, | |
software distributed under the License is distributed | |
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, | |
either express or implied. See the License for the specific language | |
governing permissions and limitations under the License. | |
*/ | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
public static partial class CSVReader { | |
public static IEnumerable<(IList<string> Data, int LineNumber)> ReadCSVFile(TextReader reader, String separators, char quote) { | |
var separatorSet = new HashSet<int>(separators.ToCharArray().Select(Convert.ToInt32)); | |
string ReadString() | |
{ | |
var chars = new StringBuilder(); | |
while (true) | |
switch (reader.Read()) | |
{ | |
case -1: | |
return chars.ToString(); | |
case var q when q == quote && reader.Peek() == quote: | |
reader.Read(); | |
chars.Append(quote); | |
break; | |
case var q when q == quote: | |
return chars.ToString(); | |
case var c: | |
chars.Append((char)c); | |
break; | |
} | |
} | |
IEnumerable<string> ReadLine() | |
{ | |
var cell = new StringBuilder(); | |
while (true) | |
switch (reader.Read()) | |
{ | |
case -1: | |
case '\r': | |
case '\n': | |
yield return cell.ToString(); | |
yield break; | |
case var s when separatorSet.Contains(s): | |
yield return cell.ToString(); | |
cell.Clear(); | |
break; | |
case var q when q == quote: | |
cell.Append(ReadString()); | |
break; | |
case var c: | |
cell.Append((char)c); | |
break; | |
} | |
} | |
int lineNumber = 1; | |
while (true) | |
switch (reader.Peek()) | |
{ | |
case -1: | |
yield break; | |
case '\r': | |
case '\n': | |
reader.Read(); | |
break; | |
default: | |
yield return (ReadLine().ToList(), lineNumber++); | |
break; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
CSVReader.ReadCSVFileAsync | |
Ported from FSharp.Data into C# | |
Changes by James Tuley 3/3/2020 | |
* Ported into C# | |
* Removes recursuion and adds mutation (since no tail call optimizaiton in c#) | |
* Converted to Async | |
Orignal Code: | |
Copyright 2011-2017, Tomas Petricek (http://tomasp.net), | |
Gustavo Guerra (http://functionalflow.co.uk), and other contributors | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, | |
software distributed under the License is distributed | |
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, | |
either express or implied. See the License for the specific language | |
governing permissions and limitations under the License. | |
*/ | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Runtime.CompilerServices; | |
using System.Text; | |
using System.Threading; | |
using System.Threading.Tasks; | |
public static partial class CSVReader { | |
public static async IAsyncEnumerable<(IList<string> Data, int LineNumber)> | |
ReadCSVFileAsync(TextReader reader, String separators, char quote, [EnumeratorCancellation] CancellationToken cancellationToken = default) { | |
var separatorSet = new HashSet<int>(separators.ToCharArray().Select(Convert.ToInt32)); | |
var buff = new char[1]; | |
async Task<int> ReadAsync() => await reader.ReadAsync(buff, cancellationToken) switch { 0 => -1, _ => buff[0] }; | |
async Task<(string QuotedText, int NextChar)> ReadStringAsync() | |
{ | |
var chars = new StringBuilder(); | |
var readChar = await ReadAsync(); | |
while (!cancellationToken.IsCancellationRequested){ | |
switch (readChar) | |
{ | |
case -1: | |
return (chars.ToString(), -1); | |
case var q when q == quote: | |
readChar = await ReadAsync(); | |
if (readChar == quote){ | |
chars.Append(quote); | |
}else{ | |
return (chars.ToString(), readChar); | |
} | |
break; | |
case var c: | |
chars.Append((char)c); | |
break; | |
} | |
readChar = await ReadAsync(); | |
} | |
return (chars.ToString(), -1); | |
} | |
async IAsyncEnumerable<string> ReadLine(int readChar) | |
{ | |
var cell = new StringBuilder(); | |
while (!cancellationToken.IsCancellationRequested) | |
{ | |
switch (readChar) | |
{ | |
case -1: | |
case '\r': | |
case '\n': | |
yield return cell.ToString(); | |
yield break; | |
case var s when separatorSet.Contains(s): | |
yield return cell.ToString(); | |
cell.Clear(); | |
readChar = await ReadAsync(); | |
break; | |
case var q when q == quote: | |
var (qq, nextChar) = await ReadStringAsync(); | |
cell.Append(qq); | |
readChar = nextChar; | |
break; | |
case var c: | |
cell.Append((char)c); | |
readChar = await ReadAsync(); | |
break; | |
} | |
} | |
} | |
int lineNumber = 1; | |
var readChar = await ReadAsync(); | |
while (!cancellationToken.IsCancellationRequested) | |
{ | |
switch (readChar) | |
{ | |
case -1: | |
yield break; | |
case '\r': | |
case '\n': | |
break; | |
default: | |
yield return (await ReadLine(readChar).ToListAsync(cancellationToken), lineNumber++); | |
break; | |
} | |
readChar = await ReadAsync(); | |
} | |
cancellationToken.ThrowIfCancellationRequested(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment