Created
March 16, 2018 09:30
-
-
Save EgorBo/bc20111c9f7514f6ba584e1d91da60a6 to your computer and use it in GitHub Desktop.
FindDups.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
namespace ConsoleApp13 | |
{ | |
class Program | |
{ | |
const string DiffTool = @"C:\Program Files\Git\usr\bin\diff.exe"; | |
const string RepoDir = @"C:\prj\corefx\src"; | |
static void Main(string[] args) | |
{ | |
var allSources = Directory.GetFiles(RepoDir, "*.cs", SearchOption.AllDirectories) | |
.GroupBy(k => Path.GetFileName(k).ToLower()) | |
.OrderByDescending(i => i.Count()) | |
.Where(i => i.Count() > 1) | |
.Select(i => new { FileName = i.Key, Files = FindDupsFromList(i.ToArray()).ToArray() }) | |
.ToArray(); | |
foreach (var item in allSources.OrderByDescending(i => i.Files.All(f => f.EndsWith("[!]")))) | |
{ | |
var files = item.Files.Distinct().ToArray(); | |
if (files.Length < 2) | |
continue; | |
Console.WriteLine($"\n{Path.GetFileName(files[0])}:"); | |
foreach (var file in files) | |
{ | |
Console.WriteLine($" - {file}"); | |
} | |
} | |
Console.WriteLine("Done!"); | |
Console.ReadKey(); | |
} | |
private static int _progres; | |
private static IEnumerable<string> FindDupsFromList(string[] array) | |
{ | |
Console.WriteLine(_progres++); | |
for (int i = 0; i < array.Length - 1; i++) | |
{ | |
for (int j = i + 1; j < array.Length; j++) | |
{ | |
var diff = Diff(DiffTool, | |
array[i], array[j]); | |
if (diff < 0.3) //diff threshold | |
{ | |
yield return array[i] + (diff == 0 ? " [!]" : ""); | |
yield return array[j] + (diff == 0 ? " [!]" : ""); | |
} | |
} | |
} | |
} | |
private static float Diff(string diffTool, string fileA, string fileB) | |
{ | |
if (fileA.Equals(fileB, StringComparison.InvariantCultureIgnoreCase)) | |
throw new InvalidOperationException(); | |
var process = Process.Start( | |
new ProcessStartInfo | |
{ | |
FileName = diffTool, | |
UseShellExecute = false, | |
CreateNoWindow = true, | |
RedirectStandardOutput = true, | |
RedirectStandardError = true, | |
Arguments = $"--ignore-blank-lines --ignore-all-space --ignore-case \"{fileA}\" \"{fileB}\"", | |
}); | |
var result = process.StandardOutput.ReadToEnd(); | |
if (string.IsNullOrEmpty(result) || | |
result == Environment.NewLine) | |
{ | |
// files are 100% equal | |
return 0f; | |
} | |
var file1Len = File.ReadAllLines(fileA).Length; | |
var diff = result.Split('\n'); | |
// compare fileA (or B) length with diff, for relativly identical files (with a small diff) the value should be less than 0.3 | |
return diff.Length / (float)file1Len; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment