Skip to content

Instantly share code, notes, and snippets.

@nkundu
Last active July 24, 2017 18:44
Show Gist options
  • Save nkundu/9fe0da753593b1401129693d966df381 to your computer and use it in GitHub Desktop.
Save nkundu/9fe0da753593b1401129693d966df381 to your computer and use it in GitHub Desktop.
Split a flat file maintaining header
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace SplitFlatFile {
class Program {
static void Main(string[] args) {
try {
string inputFile = args[0];
int linesPerFile;
var fileInfo = new FileInfo(inputFile);
if (!fileInfo.Exists)
throw new Exception("File does not exist");
double linesDbl = double.Parse(args[1]);
linesPerFile = (int)linesDbl;
if (linesPerFile != linesDbl)
throw new Exception("Specify a valid number of lines");
int count = 1;
var lines = File.ReadLines(inputFile).Skip(1).ToList();
var header = File.ReadLines(inputFile).Take(1);
var log = new List<string>();
log.Add(string.Format("Source,{0}", lines.Count()));
foreach (var chunk in Chunk(lines, linesPerFile)) {
var filename = Path.Combine(fileInfo.Directory.FullName, string.Format("{0}_{1:D2}{2}", Path.GetFileNameWithoutExtension(fileInfo.Name), count, fileInfo.Extension));
File.WriteAllLines(filename, header);
File.AppendAllLines(filename, chunk);
log.Add(string.Format("{0},{1}", filename, File.ReadAllLines(filename).Count()));
count++;
}
File.WriteAllLines(Path.Combine(fileInfo.Directory.FullName, string.Format("{0}_log.csv", Path.GetFileNameWithoutExtension(fileInfo.Name))), log);
} catch (Exception ex) {
PrintUsage();
Console.WriteLine("Error");
Console.WriteLine(ex.Message);
}
}
private static void PrintUsage() {
Console.WriteLine("Usage: SplitFlatFile filename N");
}
/// <summary>
/// Break a list of items into chunks of a specific size, perf may be O(n^2) unless you use it on a List or Array
/// http://stackoverflow.com/questions/419019/split-list-into-sublists-with-linq
/// </summary>
public static IEnumerable<IEnumerable<T>> Chunk<T>(IEnumerable<T> source, int chunksize) {
while (source.Any()) {
yield return source.Take(chunksize);
source = source.Skip(chunksize);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment