Skip to content

Instantly share code, notes, and snippets.

@EgorBo
Created March 16, 2018 09:30
Show Gist options
  • Save EgorBo/bc20111c9f7514f6ba584e1d91da60a6 to your computer and use it in GitHub Desktop.
Save EgorBo/bc20111c9f7514f6ba584e1d91da60a6 to your computer and use it in GitHub Desktop.
FindDups.cs
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
namespace ConsoleApp13
{
class Program
{
const string DiffTool = @"C:\Program Files\Git\usr\bin\diff.exe";
const string RepoDir = @"C:\prj\corefx\src";
static void Main(string[] args)
{
var allSources = Directory.GetFiles(RepoDir, "*.cs", SearchOption.AllDirectories)
.GroupBy(k => Path.GetFileName(k).ToLower())
.OrderByDescending(i => i.Count())
.Where(i => i.Count() > 1)
.Select(i => new { FileName = i.Key, Files = FindDupsFromList(i.ToArray()).ToArray() })
.ToArray();
foreach (var item in allSources.OrderByDescending(i => i.Files.All(f => f.EndsWith("[!]"))))
{
var files = item.Files.Distinct().ToArray();
if (files.Length < 2)
continue;
Console.WriteLine($"\n{Path.GetFileName(files[0])}:");
foreach (var file in files)
{
Console.WriteLine($" - {file}");
}
}
Console.WriteLine("Done!");
Console.ReadKey();
}
private static int _progres;
private static IEnumerable<string> FindDupsFromList(string[] array)
{
Console.WriteLine(_progres++);
for (int i = 0; i < array.Length - 1; i++)
{
for (int j = i + 1; j < array.Length; j++)
{
var diff = Diff(DiffTool,
array[i], array[j]);
if (diff < 0.3) //diff threshold
{
yield return array[i] + (diff == 0 ? " [!]" : "");
yield return array[j] + (diff == 0 ? " [!]" : "");
}
}
}
}
private static float Diff(string diffTool, string fileA, string fileB)
{
if (fileA.Equals(fileB, StringComparison.InvariantCultureIgnoreCase))
throw new InvalidOperationException();
var process = Process.Start(
new ProcessStartInfo
{
FileName = diffTool,
UseShellExecute = false,
CreateNoWindow = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
Arguments = $"--ignore-blank-lines --ignore-all-space --ignore-case \"{fileA}\" \"{fileB}\"",
});
var result = process.StandardOutput.ReadToEnd();
if (string.IsNullOrEmpty(result) ||
result == Environment.NewLine)
{
// files are 100% equal
return 0f;
}
var file1Len = File.ReadAllLines(fileA).Length;
var diff = result.Split('\n');
// compare fileA (or B) length with diff, for relativly identical files (with a small diff) the value should be less than 0.3
return diff.Length / (float)file1Len;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment