Skip to content

Instantly share code, notes, and snippets.

@jbtule
Last active March 4, 2020 02:07
Show Gist options
  • Save jbtule/5f417bdb1ffa41059aa42db43d7b2025 to your computer and use it in GitHub Desktop.
Save jbtule/5f417bdb1ffa41059aa42db43d7b2025 to your computer and use it in GitHub Desktop.
CSVReader.ReadCSVFile
/*
CSVReader.ReadCSVFile
Ported from FSharp.Data into C#
Changes by James Tuley 3/3/2020
* Ported into C#
* Removes recursuion and adds mutation (since no tail call optimizaiton in c#)
Orignal Code:
Copyright 2011-2017, Tomas Petricek (http://tomasp.net),
Gustavo Guerra (http://functionalflow.co.uk), and other contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language
governing permissions and limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
public static partial class CSVReader {
public static IEnumerable<(IList<string> Data, int LineNumber)> ReadCSVFile(TextReader reader, String separators, char quote) {
var separatorSet = new HashSet<int>(separators.ToCharArray().Select(Convert.ToInt32));
string ReadString()
{
var chars = new StringBuilder();
while (true)
switch (reader.Read())
{
case -1:
return chars.ToString();
case var q when q == quote && reader.Peek() == quote:
reader.Read();
chars.Append(quote);
break;
case var q when q == quote:
return chars.ToString();
case var c:
chars.Append((char)c);
break;
}
}
IEnumerable<string> ReadLine()
{
var cell = new StringBuilder();
while (true)
switch (reader.Read())
{
case -1:
case '\r':
case '\n':
yield return cell.ToString();
yield break;
case var s when separatorSet.Contains(s):
yield return cell.ToString();
cell.Clear();
break;
case var q when q == quote:
cell.Append(ReadString());
break;
case var c:
cell.Append((char)c);
break;
}
}
int lineNumber = 1;
while (true)
switch (reader.Peek())
{
case -1:
yield break;
case '\r':
case '\n':
reader.Read();
break;
default:
yield return (ReadLine().ToList(), lineNumber++);
break;
}
}
}
/*
CSVReader.ReadCSVFileAsync
Ported from FSharp.Data into C#
Changes by James Tuley 3/3/2020
* Ported into C#
* Removes recursuion and adds mutation (since no tail call optimizaiton in c#)
* Converted to Async
Orignal Code:
Copyright 2011-2017, Tomas Petricek (http://tomasp.net),
Gustavo Guerra (http://functionalflow.co.uk), and other contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language
governing permissions and limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
public static partial class CSVReader {
public static async IAsyncEnumerable<(IList<string> Data, int LineNumber)>
ReadCSVFileAsync(TextReader reader, String separators, char quote, [EnumeratorCancellation] CancellationToken cancellationToken = default) {
var separatorSet = new HashSet<int>(separators.ToCharArray().Select(Convert.ToInt32));
var buff = new char[1];
async Task<int> ReadAsync() => await reader.ReadAsync(buff, cancellationToken) switch { 0 => -1, _ => buff[0] };
async Task<(string QuotedText, int NextChar)> ReadStringAsync()
{
var chars = new StringBuilder();
var readChar = await ReadAsync();
while (!cancellationToken.IsCancellationRequested){
switch (readChar)
{
case -1:
return (chars.ToString(), -1);
case var q when q == quote:
readChar = await ReadAsync();
if (readChar == quote){
chars.Append(quote);
}else{
return (chars.ToString(), readChar);
}
break;
case var c:
chars.Append((char)c);
break;
}
readChar = await ReadAsync();
}
return (chars.ToString(), -1);
}
async IAsyncEnumerable<string> ReadLine(int readChar)
{
var cell = new StringBuilder();
while (!cancellationToken.IsCancellationRequested)
{
switch (readChar)
{
case -1:
case '\r':
case '\n':
yield return cell.ToString();
yield break;
case var s when separatorSet.Contains(s):
yield return cell.ToString();
cell.Clear();
readChar = await ReadAsync();
break;
case var q when q == quote:
var (qq, nextChar) = await ReadStringAsync();
cell.Append(qq);
readChar = nextChar;
break;
case var c:
cell.Append((char)c);
readChar = await ReadAsync();
break;
}
}
}
int lineNumber = 1;
var readChar = await ReadAsync();
while (!cancellationToken.IsCancellationRequested)
{
switch (readChar)
{
case -1:
yield break;
case '\r':
case '\n':
break;
default:
yield return (await ReadLine(readChar).ToListAsync(cancellationToken), lineNumber++);
break;
}
readChar = await ReadAsync();
}
cancellationToken.ThrowIfCancellationRequested();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment