Skip to content

Instantly share code, notes, and snippets.

@zh4n7wm
Last active December 8, 2020 09:35
Show Gist options
  • Save zh4n7wm/c757dcdb6adb32d2f6c56bc440ac94c8 to your computer and use it in GitHub Desktop.
Save zh4n7wm/c757dcdb6adb32d2f6c56bc440ac94c8 to your computer and use it in GitHub Desktop.
remove duplicate files
// remove duplicate files
// if file content with same sha256, they are same
// usage: go run remove-duplicate-files.go <your-directory> [your-dir1 you-dir2 ...]
// run with `-dryrun` if you just want to find the duplicate files
package main
import (
"crypto/sha256"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
)
var fileHashes = make(map[string]string)
var dryRun = flag.Bool("dryrun", false, "dry run, do not delete files.")
func main() {
flag.Parse()
dirs := flag.Args()
for _, dir := range dirs {
fmt.Printf("process dir: %s ...\n", dir)
iterate(dir)
}
}
func iterate(path string) {
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
if err != nil {
fmt.Errorf("walk path failed: %s", err)
return nil
}
if !info.Mode().IsRegular() {
return nil
}
fp, err := os.Open(path)
if err != nil {
fmt.Errorf("open path failed: %s", err)
return nil
}
defer fp.Close()
h256 := sha256.New()
if _, err := io.Copy(h256, fp); err != nil {
log.Fatal(err)
}
fileHash := fmt.Sprintf("%x", h256.Sum(nil))
if existFile, ok := fileHashes[fileHash]; ok {
fmt.Printf("remove file, path: %s, already exist: %s\n", path, existFile)
if !(*dryRun) {
if err := os.Remove(path); err != nil {
fmt.Errorf("remove path failed: %s", err)
return nil
}
}
} else {
fileHashes[fileHash] = path
}
return nil
})
}
@zh4n7wm
Copy link
Author

zh4n7wm commented Dec 8, 2020

build.sh

#!/bin/bash

declare -A targets=(
    [darwin]=amd64
    [linux]=amd64
    [windows]=386
    [windows]=amd64
)

for os in "${!targets[@]}"; do
    arch="${targets[$os]}"
    GOOS=$os GOARCH="$arch" go build -o "main-$os-$arch"  main.go
done

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment