-
-
Save larsw/5aae9ce9c01c5a731e77 to your computer and use it in GitHub Desktop.
File Dedupe checker (based on @khellang original code).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Kodekamp | |
open System | |
open System.IO | |
open System.Security.Cryptography | |
type Arguments = { ByteCount: int64; Path: string; } | |
let (|Long|_|) str = | |
match Int64.TryParse(str) with | |
| (true, int) -> Some(int) | |
| _ -> None | |
let ignoreAndWarn f path = | |
try | |
f(path) | |
with | |
| :? System.UnauthorizedAccessException as x -> eprintfn "UNAUTHORIZED ACCESS: %A" x.Message; Seq.empty | |
| x -> eprintfn "%A" x.Message; Seq.empty | |
let rec getFiles path = seq { | |
yield! ignoreAndWarn Directory.EnumerateFiles path | |
for directory in ignoreAndWarn Directory.EnumerateDirectories path do | |
yield! getFiles(directory) | |
} | |
let getHash (file: FileInfo) = | |
use md5 = MD5.Create() | |
use stream = file.OpenRead() | |
md5.ComputeHash(stream) | |
|> BitConverter.ToString | |
let realMain argv = | |
let args = | |
match argv with | |
| [|Long byteCount; path|] -> { ByteCount = byteCount; Path = path } | |
| _ -> eprintfn "Invalid Arguments - Usage: DuplicateFileFinder <byteCount> <path>"; exit 1 | |
let directory = new DirectoryInfo(args.Path); | |
if (not directory.Exists) then | |
eprintfn "Invalid Argument: Directory '%s' does not exist!" args.Path; exit 2 | |
printfn "Searching for duplicates in '%s'...%s" directory.FullName Environment.NewLine | |
getFiles(args.Path) | |
|> Seq.map (fun path -> new FileInfo(path)) | |
|> Seq.filter (fun file -> file.Length >= args.ByteCount) | |
|> Seq.map (fun file -> (getHash file, file.FullName)) | |
|> Seq.groupBy fst | |
// TODO: check if it is wise to do a Seq.toList here (in order to avoid multiple passes over the sequence with possible side-effects). | |
|> Seq.map (fun (key, value) -> key, Seq.map snd value) | |
|> Map.ofSeq | |
|> Map.filter (fun _ value -> Seq.length value > 1) | |
|> Map.iter (fun key value -> begin | |
value |> Seq.iter (fun duplicate -> printfn " - %s" duplicate) | |
printfn "Duplicates with hash '%s' found:" key | |
printfn "" | |
end) | |
exit 0 | |
#if INTERACTIVE | |
realMain [|"100"; System.Environment.GetFolderPath System.Environment.SpecialFolder.Personal|] |> ignore | |
#else | |
[<EntryPoint>] | |
let main argv = | |
realMain argv | |
#endif |
Note to self: https://twitter.com/bjartnes/status/636116585653080064
Maybe I need to write a new workflow builder, wrapping the seq workflow with additional exception handling.
(Or just drop the workflow altogether - but there's not much fun in that :-) )
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Current problem: the ignoreAndWarn function is supposed to catch all exceptions (-> print a warning about it, and return an empty sequence), but right now, the code wrapped with it in getFiles stil throws.
Log: https://gist.github.com/larsw/c339a6e936be3450277a