Skip to content

Instantly share code, notes, and snippets.

@MisinformedDNA
Created September 9, 2016 20:54
Show Gist options
  • Save MisinformedDNA/f06534dc15428d2b31de0bbbbadff848 to your computer and use it in GitHub Desktop.
Save MisinformedDNA/f06534dc15428d2b31de0bbbbadff848 to your computer and use it in GitHub Desktop.
"No header record was found" on Azure Data Lake
using CsvHelper;
using Microsoft.Analytics.Interfaces;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SomeNamespace
{
public class CustomExtractor : CsvExtractor
{
public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow outputrow)
{
string someValue = "QRS";
using (var reader = new StreamReader(input.BaseStream))
using (var csvReader = new CsvReader(reader))
{
while (csvReader.Read())
{
if (csvReader.IsRecordEmpty()) continue;
var tokens = csvReader.CurrentRecord;
if (tokens[0].Contains("XYZ")) continue;
if (tokens[0].Contains("AB") || tokens[0].Contains("BC"))
{
someValue = tokens[0];
continue;
}
var someColumn = tokens[0];
outputrow.Set(0, someColumn);
yield return outputrow.AsReadOnly();
}
}
}
}
}
REFERENCE ASSEMBLY master.CsvHelper;
REFERENCE ASSEMBLY master.SomeAssembly;
DECLARE @in = @"C:\SomeInput.csv";
DECLARE @in = @"C:\SomeOutput.csv";
@DATA =
EXTRACT someColumn string
FROM @in
USING new SomeNamespace.CustomExtractor();
OUTPUT @DATA
TO @out
USING Outputters.Csv()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment