Last active
July 24, 2017 18:44
-
-
Save nkundu/9fe0da753593b1401129693d966df381 to your computer and use it in GitHub Desktop.
Split a flat file maintaining header
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
namespace SplitFlatFile { | |
class Program { | |
static void Main(string[] args) { | |
try { | |
string inputFile = args[0]; | |
int linesPerFile; | |
var fileInfo = new FileInfo(inputFile); | |
if (!fileInfo.Exists) | |
throw new Exception("File does not exist"); | |
double linesDbl = double.Parse(args[1]); | |
linesPerFile = (int)linesDbl; | |
if (linesPerFile != linesDbl) | |
throw new Exception("Specify a valid number of lines"); | |
int count = 1; | |
var lines = File.ReadLines(inputFile).Skip(1).ToList(); | |
var header = File.ReadLines(inputFile).Take(1); | |
var log = new List<string>(); | |
log.Add(string.Format("Source,{0}", lines.Count())); | |
foreach (var chunk in Chunk(lines, linesPerFile)) { | |
var filename = Path.Combine(fileInfo.Directory.FullName, string.Format("{0}_{1:D2}{2}", Path.GetFileNameWithoutExtension(fileInfo.Name), count, fileInfo.Extension)); | |
File.WriteAllLines(filename, header); | |
File.AppendAllLines(filename, chunk); | |
log.Add(string.Format("{0},{1}", filename, File.ReadAllLines(filename).Count())); | |
count++; | |
} | |
File.WriteAllLines(Path.Combine(fileInfo.Directory.FullName, string.Format("{0}_log.csv", Path.GetFileNameWithoutExtension(fileInfo.Name))), log); | |
} catch (Exception ex) { | |
PrintUsage(); | |
Console.WriteLine("Error"); | |
Console.WriteLine(ex.Message); | |
} | |
} | |
private static void PrintUsage() { | |
Console.WriteLine("Usage: SplitFlatFile filename N"); | |
} | |
/// <summary> | |
/// Break a list of items into chunks of a specific size, perf may be O(n^2) unless you use it on a List or Array | |
/// http://stackoverflow.com/questions/419019/split-list-into-sublists-with-linq | |
/// </summary> | |
public static IEnumerable<IEnumerable<T>> Chunk<T>(IEnumerable<T> source, int chunksize) { | |
while (source.Any()) { | |
yield return source.Take(chunksize); | |
source = source.Skip(chunksize); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment