Last active
July 26, 2017 12:21
-
-
Save ykon/42917f5fed299db20c811c3020498598 to your computer and use it in GitHub Desktop.
Find similar file names, group them, display size in descending order.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright (c) 2017 Yuki Ono | |
* Licensed under the MIT License. | |
*/ | |
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
using System.IO; | |
using Microsoft.VisualBasic.FileIO; | |
namespace GetFileSizeGroup { | |
class Program { | |
// https://discuss.leetcode.com/topic/17639/20ms-detailed-explained-c-solutions-o-n-space | |
static int MinDistance(string w1, string w2) { | |
var M = w1.Length; var N = w2.Length; | |
var cur = Enumerable.Range(0, M + 1).ToArray(); | |
foreach (var j in Enumerable.Range(1, N)) { | |
var pre = cur.First(); | |
cur[0] = j; | |
foreach (var i in Enumerable.Range(1, M)) { | |
var temp = cur[i]; | |
cur[i] = (w1[i - 1] == w2[j - 1]) ? pre | |
: Math.Min(pre + 1, Math.Min(cur[i] + 1, cur[i - 1] + 1)); | |
pre = temp; | |
} | |
} | |
return cur.Last(); | |
} | |
static IEnumerable<string[]> ReadCSV(string csvPath) { | |
using (var tfp = new TextFieldParser(csvPath)) { | |
tfp.CommentTokens = new string[] { "#" }; | |
tfp.SetDelimiters(new string[] { "," }); | |
tfp.HasFieldsEnclosedInQuotes = true; | |
tfp.ReadLine(); | |
tfp.ReadLine(); | |
while (!tfp.EndOfData) | |
yield return tfp.ReadFields(); | |
} | |
} | |
static string GetSizeMB(long size) => | |
(size / 1024 / 1024) + "MB"; | |
static long GetGroupSize(List<(string, long)> fileList) => | |
fileList.Select(f => f.Item2).Sum(); | |
static readonly Regex SPACE_REG = new Regex(@"[_-]", RegexOptions.Compiled); | |
// without file extension | |
static string GetCompName(string path) { | |
var name = Path.GetFileNameWithoutExtension(path); | |
return SPACE_REG.Replace(name.ToLower(), " "); | |
} | |
// https://stackoverflow.com/questions/381366/is-there-a-wildcard-expansion-option-for-net-apps | |
static string[] ExpandPath(string path) { | |
var dir = Path.GetDirectoryName(path); | |
dir = String.IsNullOrEmpty(dir) ? "./" : dir; | |
var file = Path.GetFileName(path); | |
return Directory.GetFiles(dir, file); | |
} | |
static string[] GetCSVFiles(string[] args) { | |
return args.SelectMany(ExpandPath) | |
.Where(p => p.EndsWith(".csv")).ToArray(); | |
} | |
static bool CompareName(string[] x, string[] y) { | |
var w1 = GetCompName(x[0]); | |
var w2 = GetCompName(y[0]); | |
return (MinDistance(w1, w2) < 5); | |
} | |
static void Main(string[] args) { | |
var csvFiles = GetCSVFiles(args); | |
if (csvFiles.Length == 0) { | |
Console.WriteLine("Usage:"); | |
Console.WriteLine(@"Get-ChildItem C:\ -rec | where {!$_.PSIsContainer} | select-object FullName, Length | export-csv -encoding utf8 -path C:\file-list.csv"); | |
Console.WriteLine("GetFileSizeGroup *.csv > fsg-list.txt"); | |
return; | |
} | |
LinkedList<string[]> fileList = new LinkedList<string[]>(csvFiles.SelectMany(fn => ReadCSV(fn))); | |
var fileGroups = new List<(List<(string, long)>, long)>(); | |
while (fileList.Count != 0) { | |
var x = fileList.First.Value; | |
fileList.RemoveFirst(); | |
(string, long) makeTuple(string[] fields) => | |
(fields[0], long.Parse(fields[1])); | |
var group = new List<(string, long)> { makeTuple(x) }; | |
foreach (var f in fileList.AsParallel().Where(y => CompareName(x, y)).ToArray()) { | |
group.Add(makeTuple(f)); | |
fileList.Remove(f); | |
} | |
fileGroups.Add((group, GetGroupSize(group))); | |
} | |
foreach (var (fileGroup, groupSize, index) in fileGroups.OrderByDescending(fg => fg.Item2).Select((f, i) => (f.Item1, f.Item2, i))) { | |
Console.WriteLine((index + 1) + ": " + GetSizeMB(groupSize)); | |
foreach (var fg in fileGroup) | |
Console.WriteLine(fg.Item1 + ": " + fg.Item2); | |
Console.WriteLine(); | |
Console.WriteLine(); | |
} | |
//Console.ReadKey(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment