Skip to content

Instantly share code, notes, and snippets.

@henkman
Created September 9, 2016 16:54
Show Gist options
  • Select an option

  • Save henkman/d6f4a4cbf482ef9a31ba29c09024da63 to your computer and use it in GitHub Desktop.

Select an option

Save henkman/d6f4a4cbf482ef9a31ba29c09024da63 to your computer and use it in GitHub Desktop.
package main
import (
"flag"
"fmt"
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"io"
"log"
"os"
"path/filepath"
"regexp"
"runtime"
"sync"
"github.com/jteeuwen/imghash"
"gopkg.in/cheggaaa/pb.v1"
)
var (
_dir string
_distance uint64
_hashroutines int
_out string
)
func init() {
flag.StringVar(&_dir, "i", "", "image directory")
flag.Uint64Var(&_distance, "d", 5, "similarity hamming distance")
flag.IntVar(&_hashroutines, "r", -1, "number of hash routines, if 0 all cpus are used")
flag.StringVar(&_out, "o", "", "output file, if empty prints to stdout")
flag.Parse()
}
func getHash(hf imghash.HashFunc, file string) (uint64, error) {
fd, err := os.Open(file)
if err != nil {
return 0, err
}
defer fd.Close()
img, _, err := image.Decode(fd)
if err != nil {
return 0, err
}
return hf(img), nil
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
if _dir == "" {
flag.Usage()
return
}
var imgs []string
{
fd, err := os.Open(_dir)
if err != nil {
log.Fatal(err)
}
fis, err := fd.Readdir(-1)
if err != nil {
fd.Close()
log.Fatal(err)
}
imgs = make([]string, 0, 32)
reImage := regexp.MustCompile("(?i)^.*\\.(jpg|gif|png)$")
for _, fi := range fis {
if fi.IsDir() || !reImage.MatchString(fi.Name()) {
continue
}
imgs = append(imgs, fi.Name())
}
}
fmt.Println("> found", len(imgs), "images")
type ImageHash struct {
Image string
Hash uint64
}
imghashes := make([]ImageHash, 0, len(imgs))
{
imgchan := make(chan string)
imghashchan := make(chan ImageHash)
if _hashroutines < 0 {
_hashroutines = runtime.NumCPU()
}
var wg sync.WaitGroup
fmt.Println("> spawning", _hashroutines, "hash routines")
for i := 0; i < _hashroutines; i++ {
go func() {
for img := range imgchan {
hash, err := getHash(imghash.Average, filepath.Join(_dir, img))
if err != nil {
log.Fatal(err)
}
imghashchan <- ImageHash{Image: img, Hash: hash}
}
wg.Done()
}()
wg.Add(1)
}
go func() {
for _, img := range imgs {
imgchan <- img
}
close(imgchan)
wg.Wait()
close(imghashchan)
}()
fmt.Println("> calculating hashes")
bar := pb.New(len(imgs))
bar.ManualUpdate = true
bar.Start()
for imghash := range imghashchan {
imghashes = append(imghashes, imghash)
bar.Add(1)
bar.Update()
}
bar.Finish()
}
fmt.Println("> calculating similarities")
var out io.Writer
if _out != "" {
fd, err := os.OpenFile(_out, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0750)
if err != nil {
log.Fatal(err)
}
defer fd.Close()
out = fd
} else {
out = os.Stdout
}
for i, ih := range imghashes {
for e, oih := range imghashes {
if i == e {
continue
}
d := imghash.Distance(ih.Hash, oih.Hash)
if d < _distance {
fmt.Fprintln(out, ih.Image, oih.Image, "->", d)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment