Created
March 19, 2022 08:52
-
-
Save Psykar/0f4f74cf0216cf70a5218d984e72719e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A shitty little script to find duplicate files and merge things back together again | |
// Used after a reinstall of google drive sync caused a bunch of duplicate files to be created on google drive | |
// and needed to clean things up again to reclaim some space, as there was no way to do this automatically. | |
package dupes | |
import ( | |
"crypto/sha256" | |
"errors" | |
"fmt" | |
"io" | |
"io/fs" | |
"os" | |
"path" | |
"path/filepath" | |
"regexp" | |
"strings" | |
) | |
func Dupes(dir string, limit int) error { | |
count := 0 | |
pattern := `(.*) \([0-9]\)(\.[a-z][a-z]?[a-z]?[a-z]?)?` | |
m, err := regexp.Compile(pattern) | |
if err != nil { | |
return err | |
} | |
mover := dupeMover{ | |
os: realMover{}, | |
} | |
err = filepath.WalkDir(dir, func(pathn string, d fs.DirEntry, err error) error { | |
dir, basename := path.Split(pathn) | |
match := m.FindStringSubmatch(basename) | |
if len(match) == 0 { | |
return nil | |
} | |
count++ | |
// Find the original | |
origBasename := path.Join(dir, strings.Join(match[1:], "")) | |
origStat, err := os.Stat(origBasename) | |
if err != nil { | |
mover.noorigs = append(mover.noorigs, pathn) | |
// fmt.Println("error trying to find orig") | |
// fmt.Println(pathn) | |
// fmt.Println(origBasename) | |
return nil | |
} | |
stat, err := os.Stat(pathn) | |
if err != nil { | |
return err | |
} | |
if origStat.IsDir() != stat.IsDir() { | |
return errors.New("stats don't match") | |
} | |
if origStat.IsDir() { | |
if err := mover.mergeDirectory(pathn, origBasename); err != nil { | |
return err | |
} | |
} else { | |
if err := mover.deleteIfEqual(pathn, origBasename); err != nil { | |
return err | |
} | |
} | |
if count > limit { | |
return errors.New(fmt.Sprint(limit, " files exceeded")) | |
} | |
return nil | |
}) | |
fmt.Println("====") | |
fmt.Println("mismatches\n", strings.Join(mover.mismatches, "\n")) | |
fmt.Println("====") | |
fmt.Println("no origs:\n", strings.Join(mover.noorigs, "\n")) | |
return err | |
} | |
type Mover interface { | |
RemoveAll(string) error | |
Rename(string, string) error | |
Remove(string) error | |
} | |
type dupeMover struct { | |
os Mover | |
mismatches []string | |
noorigs []string | |
} | |
func (m dupeMover) mergeDirectory(source, dest string) error { | |
// Move files in the path into orig | |
files, err := os.ReadDir(source) | |
if err != nil { | |
return err | |
} | |
for _, f := range files { | |
if f.IsDir() { | |
if err := m.mergeDirectory( | |
path.Join(source, f.Name()), | |
path.Join(dest, f.Name())); err != nil { | |
return err | |
} | |
} else { | |
if err := m.deleteIfEqual( | |
path.Join(source, f.Name()), | |
path.Join(dest, f.Name()), | |
); err != nil { | |
return err | |
} | |
} | |
} | |
return m.os.RemoveAll(source) | |
} | |
func (m *dupeMover) deleteIfEqual(source, dest string) error { | |
if _, err := os.Stat(source); err != nil { | |
return err | |
} | |
if _, err := os.Stat(dest); err != nil { | |
// move it instead | |
return m.os.Rename(source, dest) | |
} | |
shash := hashFile(source) | |
dhash := hashFile(dest) | |
if shash != dhash { | |
m.mismatches = append(m.mismatches, dest) | |
// fmt.Println("Don't match!!!!") | |
// fmt.Println(shash) | |
// fmt.Println(dhash) | |
// fmt.Println(source) | |
// fmt.Println(dest) | |
return nil | |
} | |
return m.os.Remove(source) | |
} | |
func hashFile(filename string) string { | |
f, err := os.Open(filename) | |
if err != nil { | |
panic(err) | |
} | |
defer f.Close() | |
h := sha256.New() | |
if _, err := io.Copy(h, f); err != nil { | |
panic(err) | |
} | |
return fmt.Sprintf("%x", h.Sum(nil)) | |
} | |
type fakeMover struct { | |
} | |
func (f fakeMover) Remove(s string) error { | |
fmt.Println("removed:: ", s) | |
return nil | |
} | |
func (fakeMover) RemoveAll(s string) error { | |
fmt.Println("removed directory:: ", s) | |
return nil | |
} | |
func (fakeMover) Rename(source, dest string) error { | |
fmt.Println("renamed:: ", source, " :=>: ", dest) | |
return nil | |
} | |
type realMover struct{} | |
func (f realMover) Remove(s string) error { | |
fmt.Println("removed:: ", s) | |
return os.Remove(s) | |
} | |
func (realMover) RemoveAll(s string) error { | |
fmt.Println("removed directory:: ", s) | |
f, err := os.ReadDir(s) | |
if err != nil { | |
return err | |
} | |
if len(f) > 0 { | |
return errors.New("tried to remove non empty dir") | |
} | |
return os.RemoveAll(s) | |
} | |
func (realMover) Rename(source, dest string) error { | |
fmt.Println("renamed:: ", source, " :=>: ", dest) | |
return os.Rename(source, dest) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment