Skip to content

Instantly share code, notes, and snippets.

@juanplopes
Created October 25, 2012 04:28
Show Gist options
  • Save juanplopes/3950410 to your computer and use it in GitHub Desktop.
Save juanplopes/3950410 to your computer and use it in GitHub Desktop.
Naïve external sort in C#
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace DirtyMergeSort
{
public static class Program
{
public static void Main(string[] args)
{
File.WriteAllLines(@"d:\sorted.txt",
File.ReadLines(@"d:\data.txt")
.Select(Progress("read"))
.Sorted(1000000)
.Select(Progress("write")));
}
private static Func<string, int, string> Progress(string type)
{
return (x, i) =>
{
if (i % 100000 == 0) Console.WriteLine("{0}: {1}", type, i);
return x;
};
}
public static IEnumerable<string> Sorted(this IEnumerable<string> lines, int size)
{
var files = lines.Partition(size)
.Select(x=>new {File = x, Lines = File.ReadLines(x).GetEnumerator()})
.Where(x=>x.Lines.MoveNext())
.ToList();
while (files.Any())
{
var current = files.Min(x => x.Lines.Current);
yield return current;
files = files.Where(x =>
{
while (x.Lines.Current == current)
{
if (!x.Lines.MoveNext())
{
File.Delete(x.File);
Console.WriteLine("Deleted file {0}", x.File);
return false;
}
}
return true;
}).ToList();
}
}
public static IEnumerable<string> Partition(this IEnumerable<string> lines, int size)
{
return lines.Batch(size)
.AsParallel()
.WithMergeOptions(ParallelMergeOptions.NotBuffered)
.Select(batch =>
{
var temp = Path.GetTempFileName();
File.WriteAllLines(temp, batch.OrderBy(x=>x));
Console.WriteLine("Wrote file {0}", temp);
return temp;
});
}
public static IEnumerable<List<T>> Batch<T>(this IEnumerable<T> collection, int size)
{
var batch = new List<T>(size);
foreach (T item in collection)
{
batch.Add(item);
if (batch.Count == size)
{
yield return batch;
batch = new List<T>(size);
}
}
if (batch.Any())
yield return batch;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment