Skip to content

Instantly share code, notes, and snippets.

@jlewin
Created June 27, 2013 18:23
Show Gist options
  • Save jlewin/5879001 to your computer and use it in GitHub Desktop.
Save jlewin/5879001 to your computer and use it in GitHub Desktop.
List duplicate files in a directory
/// <summary>
/// Processes a given directory, grouping and listing files with duplicate content
/// </summary>
/// <param name="directory">The path to process</param>
private void ListDuplicateFiles(string directory)
{
// Calculate and store the hash and path for each file in the directory
var files = Directory.GetFiles(directory).Select(f => new { Path = f, Hash = FileHash.CalculateFromFile(f) });
// Group and iterate when duplicates exist
foreach (var group in files.GroupBy(f => f.Hash).Where(g => g.Count() > 1))
{
Console.WriteLine("--- Dupe ({0})-------------------------------", group.Key);
foreach (var file in group)
{
// List each file
Console.WriteLine("\t {0}", Path.GetFileName(file.Path));
}
}
}
public static class FileHash
{
/// <summary>
/// Generate an MD5 hash of the given file
/// </summary>
/// <param name="filename">The content file</param>
/// <returns>The generated hash</returns>
public static string CalculateFromFile(string filename)
{
// read file bytes
byte[] bytes = File.ReadAllBytes(filename);
// md5 calculation
MD5CryptoServiceProvider md5 = new MD5CryptoServiceProvider();
byte[] output = md5.ComputeHash(bytes);
string hash = Convert.ToBase64String(output);
// returning
return hash;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment