Skip to content

Instantly share code, notes, and snippets.

@pldmgg
Last active August 9, 2017 17:35
Show Gist options
  • Save pldmgg/242de93928dee58ab69721d5afed0849 to your computer and use it in GitHub Desktop.
Save pldmgg/242de93928dee58ab69721d5afed0849 to your computer and use it in GitHub Desktop.
CSVManipulation
public class Manipulation
{
private static void ValidateSource(string source)
{
// Validate path...
bool sourceexists = File.Exists(source);
if (!sourceexists)
{
Console.WriteLine("Unable to find " + source + "!");
// For exit codes, see: https://msdn.microsoft.com/en-us/library/ms681382(v=vs.85)
Environment.Exit(3);
}
}
private static string[] GetHeaders(string source)
{
ValidateSource(source);
string csvsource = @source;
StreamReader sourcereader = new StreamReader(csvsource);
var csvreadsource = new CsvReader(sourcereader);
var headers = csvreadsource.FieldHeaders;
sourcereader.Close();
sourcereader.Dispose();
return headers;
}
private static IEnumerable<CSVRowObj> GetRecords(string source)
{
ValidateSource(source);
string csvsource = @source;
// NOTE: See difference between System.IO.StreamReader and System.IO.TextReader here (either one is fine for this program):
// https://stackoverflow.com/questions/6669885/difference-between-streamwriter-reader-and-stringwriter-readerll?rq=1
StreamReader sourcereader = new StreamReader(csvsource);
var csvreadsource = new CsvReader(sourcereader);
var records = csvreadsource.GetRecords<CSVRowObj>();
sourcereader.Close();
sourcereader.Dispose();
return records;
}
public static void WriteUnique(string oldsource, string newsource, string dest)
{
// Validate paths...
ValidateSource(oldsource);
ValidateSource(newsource);
string destparentdir = Path.GetDirectoryName(dest);
bool destparentdirexists = Directory.Exists(destparentdir);
if (!destparentdirexists)
{
Console.WriteLine("Unable to find" + destparentdir + "!");
Environment.Exit(3);
}
string oldcsvsource = @oldsource;
string newcsvsource = @newsource;
string outputdiff = @dest;
// NOTE: See difference between System.IO.StreamReader and System.IO.TextReader here (either one is fine for this program):
// https://stackoverflow.com/questions/6669885/difference-between-streamwriter-reader-and-stringwriter-readerll?rq=1
StreamReader oldreader = new StreamReader(oldcsvsource);
StreamReader newreader = new StreamReader(newcsvsource);
StreamWriter outputwriter = new StreamWriter(outputdiff);
var oldcsvreadsource = new CsvReader(oldreader);
var newcsvreadsource = new CsvReader(newreader);
var csvwritedest = new CsvWriter(outputwriter);
// WARNING: The below .ToList() puts everything in memory!
var oldrecords = oldcsvreadsource.GetRecords<CSVRowObj>().ToList();
var newrecords = newcsvreadsource.GetRecords<CSVRowObj>().ToList();
var allrecords = oldrecords.Union(newrecords);
var distinctrecords = allrecords.Distinct(new CSVRowObj.Comparer());
csvwritedest.WriteRecords(distinctrecords);
oldreader.Close();
oldreader.Dispose();
newreader.Close();
newreader.Dispose();
outputwriter.Close();
outputwriter.Dispose();
}
public static void JoinOnTypeandReference(string sourcea, string sourceb, string dest)
{
// Validate Output Destination
string outputdest = @dest;
string destparentdir = Path.GetDirectoryName(outputdest);
bool destparentdirexists = Directory.Exists(destparentdir);
if (!destparentdirexists)
{
Console.WriteLine("Unable to find" + destparentdir + "!");
Environment.Exit(3);
}
// Get Records from both csv files
// NOTE: The IEnumerables are NOT entirely loaded in memory until you actually do something with them
var leftrecords = GetRecords(sourcea);
var rightrecords = GetRecords(sourceb);
// Prepare to write to destination
StreamWriter outputwriter = new StreamWriter(outputdest);
var csvwritedest = new CsvWriter(outputwriter);
// Join right records to left records if Type and Reference values are the same
var matches = from left in leftrecords
join right in rightrecords
on
new { left.Type, left.Reference} equals
new { right.Type, right.Reference }
select new CSVRowObj
{
Date = left.Date,
Amount = left.Amount,
Description = left.Description,
Description2 = left.Description2 + right.Description2,
Reference = left.Reference,
Type = left.Type,
};
csvwritedest.WriteRecords(matches);
}
}
class CSVRowObj
{
public string Date { get; set; }
public string Amount { get; set; }
public string Description { get; set; }
public string Description2 { get; set; }
public string Reference { get; set; }
public string Type { get; set; }
// From: https://stackoverflow.com/questions/16197290/checking-for-duplicates-in-a-list-of-objects-c-sharp
public class Comparer : IEqualityComparer<CSVRowObj>
{
public bool Equals(CSVRowObj x, CSVRowObj y)
{
if (x == null || y == null)
return false;
return x.Date == y.Date && x.Amount == y.Amount && x.Description == y.Description &&
x.Description2 == y.Description2 && x.Reference == y.Reference && x.Type == y.Type;
}
public int GetHashCode(CSVRowObj obj)
{
if (obj == null)
return 0;
// NOTE: The below return statement returns true if and only if ONE of the operands returns true
// This behavior is due to the carrot '^' between the operands
return (obj.Date == null ? 0 : obj.Date.GetHashCode()) ^
(obj.Amount == null ? 0 : obj.Amount.GetHashCode()) ^
(obj.Description == null ? 0 : obj.Description.GetHashCode()) ^
(obj.Description2 == null ? 0 : obj.Description2.GetHashCode()) ^
(obj.Reference == null ? 0 : obj.Reference.GetHashCode()) ^
(obj.Type == null ? 0 : obj.Type.GetHashCode());
}
}
}
public void Main()
{
string csvorig = @"C:\Users\pddomain\Downloads\csv\April2016.csv";
string csvnew = @"C:\Users\pddomain\Downloads\csv\April2016_New.csv";
string outputfile = @"C:\Users\pddomain\Output.csv";
//Manipulation.WriteUnique(csvorig, csvnew, outputfile);
Manipulation.JoinOnTypeandReference(csvorig, csvnew, outputfile);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment