Created
September 9, 2016 16:54
-
-
Save henkman/d6f4a4cbf482ef9a31ba29c09024da63 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "flag" | |
| "fmt" | |
| "image" | |
| _ "image/gif" | |
| _ "image/jpeg" | |
| _ "image/png" | |
| "io" | |
| "log" | |
| "os" | |
| "path/filepath" | |
| "regexp" | |
| "runtime" | |
| "sync" | |
| "github.com/jteeuwen/imghash" | |
| "gopkg.in/cheggaaa/pb.v1" | |
| ) | |
| var ( | |
| _dir string | |
| _distance uint64 | |
| _hashroutines int | |
| _out string | |
| ) | |
| func init() { | |
| flag.StringVar(&_dir, "i", "", "image directory") | |
| flag.Uint64Var(&_distance, "d", 5, "similarity hamming distance") | |
| flag.IntVar(&_hashroutines, "r", -1, "number of hash routines, if 0 all cpus are used") | |
| flag.StringVar(&_out, "o", "", "output file, if empty prints to stdout") | |
| flag.Parse() | |
| } | |
| func getHash(hf imghash.HashFunc, file string) (uint64, error) { | |
| fd, err := os.Open(file) | |
| if err != nil { | |
| return 0, err | |
| } | |
| defer fd.Close() | |
| img, _, err := image.Decode(fd) | |
| if err != nil { | |
| return 0, err | |
| } | |
| return hf(img), nil | |
| } | |
| func main() { | |
| runtime.GOMAXPROCS(runtime.NumCPU()) | |
| if _dir == "" { | |
| flag.Usage() | |
| return | |
| } | |
| var imgs []string | |
| { | |
| fd, err := os.Open(_dir) | |
| if err != nil { | |
| log.Fatal(err) | |
| } | |
| fis, err := fd.Readdir(-1) | |
| if err != nil { | |
| fd.Close() | |
| log.Fatal(err) | |
| } | |
| imgs = make([]string, 0, 32) | |
| reImage := regexp.MustCompile("(?i)^.*\\.(jpg|gif|png)$") | |
| for _, fi := range fis { | |
| if fi.IsDir() || !reImage.MatchString(fi.Name()) { | |
| continue | |
| } | |
| imgs = append(imgs, fi.Name()) | |
| } | |
| } | |
| fmt.Println("> found", len(imgs), "images") | |
| type ImageHash struct { | |
| Image string | |
| Hash uint64 | |
| } | |
| imghashes := make([]ImageHash, 0, len(imgs)) | |
| { | |
| imgchan := make(chan string) | |
| imghashchan := make(chan ImageHash) | |
| if _hashroutines < 0 { | |
| _hashroutines = runtime.NumCPU() | |
| } | |
| var wg sync.WaitGroup | |
| fmt.Println("> spawning", _hashroutines, "hash routines") | |
| for i := 0; i < _hashroutines; i++ { | |
| go func() { | |
| for img := range imgchan { | |
| hash, err := getHash(imghash.Average, filepath.Join(_dir, img)) | |
| if err != nil { | |
| log.Fatal(err) | |
| } | |
| imghashchan <- ImageHash{Image: img, Hash: hash} | |
| } | |
| wg.Done() | |
| }() | |
| wg.Add(1) | |
| } | |
| go func() { | |
| for _, img := range imgs { | |
| imgchan <- img | |
| } | |
| close(imgchan) | |
| wg.Wait() | |
| close(imghashchan) | |
| }() | |
| fmt.Println("> calculating hashes") | |
| bar := pb.New(len(imgs)) | |
| bar.ManualUpdate = true | |
| bar.Start() | |
| for imghash := range imghashchan { | |
| imghashes = append(imghashes, imghash) | |
| bar.Add(1) | |
| bar.Update() | |
| } | |
| bar.Finish() | |
| } | |
| fmt.Println("> calculating similarities") | |
| var out io.Writer | |
| if _out != "" { | |
| fd, err := os.OpenFile(_out, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0750) | |
| if err != nil { | |
| log.Fatal(err) | |
| } | |
| defer fd.Close() | |
| out = fd | |
| } else { | |
| out = os.Stdout | |
| } | |
| for i, ih := range imghashes { | |
| for e, oih := range imghashes { | |
| if i == e { | |
| continue | |
| } | |
| d := imghash.Distance(ih.Hash, oih.Hash) | |
| if d < _distance { | |
| fmt.Fprintln(out, ih.Image, oih.Image, "->", d) | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment