static class LinqExtension { public static IEnumerable<T> SampleEvery<T>(this IEnumerable<T> items, int sample) { int i = 0; foreach (var item in items) { if ((i % sample) == 0) { yield return item; } i++; } } } // number of samples to take const int SampleCount = 4; // size of each random file sample const int SampleSize = 4 * 1024; // files smaller than this get no random sampling const int SamplingThreshold = 16 * 1024; public static Guid GetFileSignature(string filename) { byte[] buffer; long filesize; using (var reader = File.Open(filename, FileMode.Open, FileAccess.Read)) { filesize = reader.Length; if (filesize < SamplingThreshold) { buffer = new byte[filesize]; Read(reader, buffer, 0, (int)filesize); } else { Random random = new Random((int)(filesize % int.MaxValue)); int maxSize = filesize < (long)Int32.MaxValue ? (int)filesize : Int32.MaxValue; // space out random numbers var startPositions = Enumerable .Range(0, SampleCount * 4) .Select(_ => random.Next(maxSize)) .OrderBy(i => i) .SampleEvery(4) .ToArray(); buffer = new byte[SampleCount * SampleSize]; int bufferPosition = 0; long currentPosition = 0; foreach (var start in startPositions) { currentPosition = reader.Seek(start - currentPosition, SeekOrigin.Current); var bytesRead = Read(reader, buffer, bufferPosition, SampleSize); currentPosition += bytesRead; bufferPosition += bytesRead; } } } var md5Provider = new MD5CryptoServiceProvider(); md5Provider.TransformBlock(buffer, 0, buffer.Length, buffer, 0); // include the filesize in the hash var fileSizeArray = BitConverter.GetBytes(filesize); md5Provider.TransformFinalBlock(fileSizeArray, 0, fileSizeArray.Length); return new Guid(md5Provider.Hash); } private static int Read(FileStream reader, byte[] buffer, int offset, int count) { int totalBytesRead = 0; int bytesRead = 0; do { bytesRead = reader.Read(buffer, offset + totalBytesRead, count - totalBytesRead); totalBytesRead += bytesRead; } while (totalBytesRead < count && bytesRead > 0); return totalBytesRead; }