Last active
August 21, 2021 06:25
-
-
Save rinukkusu/8e2c6c8df3c64e129b375e3937bfa9bb to your computer and use it in GitHub Desktop.
Helper class to read CSV files from a ZIP archive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.IO; | |
using System.IO.Compression; | |
using System.Net.Http; | |
using System.Threading.Tasks; | |
using CsvHelper; // see https://joshclose.github.io/CsvHelper/ | |
namespace ZippediZap | |
{ | |
public class ZippedCsvReader : IDisposable | |
{ | |
private readonly HttpClient _httpClient = new HttpClient(); | |
private readonly string _url; | |
private Stream _zipDataStream; | |
private ZipArchive _zipArchive; | |
public ZippedCsvReader(string url) | |
{ | |
_url = url; | |
} | |
public ZippedCsvReader(Stream stream) | |
{ | |
_zipDataStream = stream; | |
} | |
private async Task LoadArchive() | |
{ | |
if (_zipArchive != null) | |
return; | |
if (_url != null) | |
_zipDataStream = await _httpClient.GetStreamAsync(_url); | |
_zipArchive = new ZipArchive(_zipDataStream, ZipArchiveMode.Read); | |
} | |
private ZipArchiveEntry GetEntry(string zippedFilename) | |
{ | |
var entry = _zipArchive.GetEntry(zippedFilename); | |
if (entry == null) | |
throw new FileNotFoundException( | |
"File could not be found in Zip archive.", | |
zippedFilename); | |
return entry; | |
} | |
private async Task<T> Read<T>(string zippedFilename, Func<CsvReader, T> readDelegate, string csvDelimiter = ";") | |
{ | |
await LoadArchive(); | |
var entry = GetEntry(zippedFilename); | |
using var streamReader = new StreamReader(entry.Open()); | |
using var csvReader = new CsvReader(streamReader, CultureInfo.InvariantCulture); | |
csvReader.Configuration.Delimiter = csvDelimiter; | |
return readDelegate(csvReader); | |
} | |
public Task<T> ReadFirstLine<T>(string zippedFilename, string csvDelimiter = ";") | |
{ | |
return Read(zippedFilename, reader => | |
{ | |
reader.Read(); // set head to first line | |
reader.ReadHeader(); // read header | |
reader.Read(); // set head to second line (actual data) | |
return reader.GetRecord<T>(); | |
}, csvDelimiter); | |
} | |
public Task<IEnumerable<T>> ReadAllLines<T>(string zippedFilename, string csvDelimiter = ";") | |
{ | |
return Read(zippedFilename, reader => reader.GetRecords<T>(), csvDelimiter); | |
} | |
public void Dispose() | |
{ | |
_httpClient?.Dispose(); | |
_zipArchive?.Dispose(); | |
_zipDataStream?.Dispose(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good point, fixed!