Last active
August 9, 2017 17:35
-
-
Save pldmgg/242de93928dee58ab69721d5afed0849 to your computer and use it in GitHub Desktop.
CSVManipulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Manipulation | |
{ | |
private static void ValidateSource(string source) | |
{ | |
// Validate path... | |
bool sourceexists = File.Exists(source); | |
if (!sourceexists) | |
{ | |
Console.WriteLine("Unable to find " + source + "!"); | |
// For exit codes, see: https://msdn.microsoft.com/en-us/library/ms681382(v=vs.85) | |
Environment.Exit(3); | |
} | |
} | |
private static string[] GetHeaders(string source) | |
{ | |
ValidateSource(source); | |
string csvsource = @source; | |
StreamReader sourcereader = new StreamReader(csvsource); | |
var csvreadsource = new CsvReader(sourcereader); | |
var headers = csvreadsource.FieldHeaders; | |
sourcereader.Close(); | |
sourcereader.Dispose(); | |
return headers; | |
} | |
private static IEnumerable<CSVRowObj> GetRecords(string source) | |
{ | |
ValidateSource(source); | |
string csvsource = @source; | |
// NOTE: See difference between System.IO.StreamReader and System.IO.TextReader here (either one is fine for this program): | |
// https://stackoverflow.com/questions/6669885/difference-between-streamwriter-reader-and-stringwriter-readerll?rq=1 | |
StreamReader sourcereader = new StreamReader(csvsource); | |
var csvreadsource = new CsvReader(sourcereader); | |
var records = csvreadsource.GetRecords<CSVRowObj>(); | |
sourcereader.Close(); | |
sourcereader.Dispose(); | |
return records; | |
} | |
public static void WriteUnique(string oldsource, string newsource, string dest) | |
{ | |
// Validate paths... | |
ValidateSource(oldsource); | |
ValidateSource(newsource); | |
string destparentdir = Path.GetDirectoryName(dest); | |
bool destparentdirexists = Directory.Exists(destparentdir); | |
if (!destparentdirexists) | |
{ | |
Console.WriteLine("Unable to find" + destparentdir + "!"); | |
Environment.Exit(3); | |
} | |
string oldcsvsource = @oldsource; | |
string newcsvsource = @newsource; | |
string outputdiff = @dest; | |
// NOTE: See difference between System.IO.StreamReader and System.IO.TextReader here (either one is fine for this program): | |
// https://stackoverflow.com/questions/6669885/difference-between-streamwriter-reader-and-stringwriter-readerll?rq=1 | |
StreamReader oldreader = new StreamReader(oldcsvsource); | |
StreamReader newreader = new StreamReader(newcsvsource); | |
StreamWriter outputwriter = new StreamWriter(outputdiff); | |
var oldcsvreadsource = new CsvReader(oldreader); | |
var newcsvreadsource = new CsvReader(newreader); | |
var csvwritedest = new CsvWriter(outputwriter); | |
// WARNING: The below .ToList() puts everything in memory! | |
var oldrecords = oldcsvreadsource.GetRecords<CSVRowObj>().ToList(); | |
var newrecords = newcsvreadsource.GetRecords<CSVRowObj>().ToList(); | |
var allrecords = oldrecords.Union(newrecords); | |
var distinctrecords = allrecords.Distinct(new CSVRowObj.Comparer()); | |
csvwritedest.WriteRecords(distinctrecords); | |
oldreader.Close(); | |
oldreader.Dispose(); | |
newreader.Close(); | |
newreader.Dispose(); | |
outputwriter.Close(); | |
outputwriter.Dispose(); | |
} | |
public static void JoinOnTypeandReference(string sourcea, string sourceb, string dest) | |
{ | |
// Validate Output Destination | |
string outputdest = @dest; | |
string destparentdir = Path.GetDirectoryName(outputdest); | |
bool destparentdirexists = Directory.Exists(destparentdir); | |
if (!destparentdirexists) | |
{ | |
Console.WriteLine("Unable to find" + destparentdir + "!"); | |
Environment.Exit(3); | |
} | |
// Get Records from both csv files | |
// NOTE: The IEnumerables are NOT entirely loaded in memory until you actually do something with them | |
var leftrecords = GetRecords(sourcea); | |
var rightrecords = GetRecords(sourceb); | |
// Prepare to write to destination | |
StreamWriter outputwriter = new StreamWriter(outputdest); | |
var csvwritedest = new CsvWriter(outputwriter); | |
// Join right records to left records if Type and Reference values are the same | |
var matches = from left in leftrecords | |
join right in rightrecords | |
on | |
new { left.Type, left.Reference} equals | |
new { right.Type, right.Reference } | |
select new CSVRowObj | |
{ | |
Date = left.Date, | |
Amount = left.Amount, | |
Description = left.Description, | |
Description2 = left.Description2 + right.Description2, | |
Reference = left.Reference, | |
Type = left.Type, | |
}; | |
csvwritedest.WriteRecords(matches); | |
} | |
} | |
class CSVRowObj | |
{ | |
public string Date { get; set; } | |
public string Amount { get; set; } | |
public string Description { get; set; } | |
public string Description2 { get; set; } | |
public string Reference { get; set; } | |
public string Type { get; set; } | |
// From: https://stackoverflow.com/questions/16197290/checking-for-duplicates-in-a-list-of-objects-c-sharp | |
public class Comparer : IEqualityComparer<CSVRowObj> | |
{ | |
public bool Equals(CSVRowObj x, CSVRowObj y) | |
{ | |
if (x == null || y == null) | |
return false; | |
return x.Date == y.Date && x.Amount == y.Amount && x.Description == y.Description && | |
x.Description2 == y.Description2 && x.Reference == y.Reference && x.Type == y.Type; | |
} | |
public int GetHashCode(CSVRowObj obj) | |
{ | |
if (obj == null) | |
return 0; | |
// NOTE: The below return statement returns true if and only if ONE of the operands returns true | |
// This behavior is due to the carrot '^' between the operands | |
return (obj.Date == null ? 0 : obj.Date.GetHashCode()) ^ | |
(obj.Amount == null ? 0 : obj.Amount.GetHashCode()) ^ | |
(obj.Description == null ? 0 : obj.Description.GetHashCode()) ^ | |
(obj.Description2 == null ? 0 : obj.Description2.GetHashCode()) ^ | |
(obj.Reference == null ? 0 : obj.Reference.GetHashCode()) ^ | |
(obj.Type == null ? 0 : obj.Type.GetHashCode()); | |
} | |
} | |
} | |
public void Main() | |
{ | |
string csvorig = @"C:\Users\pddomain\Downloads\csv\April2016.csv"; | |
string csvnew = @"C:\Users\pddomain\Downloads\csv\April2016_New.csv"; | |
string outputfile = @"C:\Users\pddomain\Output.csv"; | |
//Manipulation.WriteUnique(csvorig, csvnew, outputfile); | |
Manipulation.JoinOnTypeandReference(csvorig, csvnew, outputfile); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment