Last active
August 29, 2015 14:28
-
-
Save khellang/3a1a49744f6825b6f401 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.IO | |
open System.Security.Cryptography | |
type Arguments = { ByteCount: int64; Path: string; } | |
let (|Long|_|) str = | |
match Int64.TryParse(str) with | |
| (true, int) -> Some(int) | |
| _ -> None | |
let rec getFiles path = seq { | |
yield! Directory.EnumerateFiles(path) | |
for directory in Directory.EnumerateDirectories(path) do | |
yield! getFiles(directory) | |
} | |
let getHash (file: FileInfo) = | |
use md5 = MD5.Create() | |
use stream = file.OpenRead() | |
md5.ComputeHash(stream) | |
|> Array.map (fun x -> String.Format("{0:x2}", x)) | |
|> String.concat String.Empty | |
[<EntryPoint>] | |
let main argv = | |
let args = | |
match argv with | |
| [|Long byteCount; path|] -> { ByteCount = byteCount; Path = path } | |
| _ -> eprintfn "Invalid Arguments - Usage: DuplicateFileFinder <byteCount> <path>"; exit 1 | |
let directory = new DirectoryInfo(args.Path); | |
if (not directory.Exists) then | |
eprintfn "Invalid Argument: Directory '%s' does not exist!" args.Path; exit 2 | |
printfn "Searching for duplicates in '%s'...%s" directory.FullName Environment.NewLine | |
let fileGroups = | |
getFiles(args.Path) | |
|> Seq.map (fun path -> new FileInfo(path)) | |
|> Seq.filter (fun file -> file.Length >= args.ByteCount) | |
|> Seq.map (fun file -> (getHash file, file.FullName)) | |
|> Seq.groupBy fst | |
|> Seq.map (fun (key, value) -> key, Seq.map snd value) | |
|> Map.ofSeq | |
for group in fileGroups do | |
if (Seq.length(group.Value) > 1) then | |
printfn "Duplicates with hash '%s' found:" group.Key | |
for duplicate in group.Value do | |
printfn " - %s" duplicate | |
printfn "" | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment