Created
June 4, 2011 14:25
-
-
Save Cairnarvon/1007938 to your computer and use it in GitHub Desktop.
Tineye-style image search, locally. Everyone's a winner. (Requires PIL. --help for usage.)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import glob | |
import math | |
import os | |
import getopt | |
import sys | |
from PIL import Image | |
EXTS = 'jpg', 'jpeg', 'JPG', 'JPEG', 'gif', 'GIF', 'png', 'PNG', 'bmp', 'BMP' | |
PROG_NO, PROG_YES, PROG_SMART = range(3) | |
def avhash(im): | |
if not isinstance(im, Image.Image): | |
im = Image.open(im) | |
im = im.resize((8, 8), Image.ANTIALIAS).convert('L') | |
avg = reduce(lambda x, y: x + y, im.getdata()) / 64. | |
return reduce(lambda x, (y, z): x | (z << y), | |
enumerate(map(lambda i: 0 if i < avg else 1, im.getdata())), | |
0) | |
def phash(im): | |
if not isinstance(im, Image.Image): | |
im = Image.open(im) | |
im = im.resize((32, 32), Image.ANTIALIAS).convert('L') | |
seq = [sum(im.getpixel((x, y)) * \ | |
math.cos(math.pi / 32 * (x + .5) * u) * \ | |
math.cos(math.pi / 32 * (y + .5) * v) \ | |
for x in range(32) for y in range(32)) \ | |
for v in range(8) for u in range(8)] | |
avg = reduce(lambda x, y: x + y, seq[1:]) / (len(seq) - 1) | |
return reduce(lambda x, (y, z): x | (z << y), | |
enumerate(map(lambda i: 0 if i < avg else 1, seq)), | |
0) | |
def hamming(h1, h2): | |
h, d = 0, h1 ^ h2 | |
while d: | |
h += 1 | |
d &= d - 1 | |
return h | |
def _compare(f, fs, algo=avhash, exts=EXTS, prog=PROG_SMART, thresh=64): | |
h, seq, x, y = algo(f), [], 1, len(fs) | |
for f in fs: | |
seq.append((hamming(algo(f), h), f)) | |
_progress(x, y, prog) | |
x += 1 | |
for t in sorted(seq, key=lambda i: i[0]): | |
if t[0] <= thresh: | |
print "%d\t%s" % t | |
def _expand_files(l): | |
if not l: | |
l = ['.'] | |
images = [] | |
for e in l: | |
if os.path.isdir(e): | |
for ext in EXTS: | |
images.extend(glob.glob('%s/*.%s' % (e, ext))) | |
else: | |
images.extend(glob.glob(e)) | |
return images | |
def _help(): | |
print >> sys.stderr, u"""\ | |
\033[1mUSAGE\033[0m | |
\033[1m%s\033[0m [\033[4mOPTION\033[0m]... \033[4mIMAGE\033[0m [\033[4mIMAGE\033[0m|\033[4mDIR\033[0m]... | |
\033[1mDESCRIPTION\033[0m | |
Examine images to see how similar they are to a target image. | |
The first image specified is the target image. Every subsequent image is | |
examined to see if it matches. You may also provide directories instead, in | |
which case we'll look for images there (though not recursively through | |
subdirectories). | |
Output is the list of images sorted by similarity, plus a number between | |
0 and 64 indicating similarity (0 being identical). | |
If no extra images are supplied, we'll just look in the current directory. | |
Mandatory arguments to long options are mandatory for short options too. | |
\033[1m-h\033[0m, \033[1m--help\033[0m | |
Print this message and exit. | |
\033[1m-a\033[0m, \033[1m--algo\033[0m[\033[1mrithm\033[0m]=\033[4mALGO\033[0m | |
Specify hashing algorithm. (\033[1maverage\033[0m or \033[1mphash\033[0m; default: \033[1maverage\033[0m) | |
\033[1m--average\033[0m, \033[0m--avg\033[1m | |
Same as \033[1m--algo=average\033[0m. | |
\033[1m-e\033[0m, \033[1mext\033[0m[\033[1mension\033[0m]\033[1ms\033[0m=\033[4mEXTS\033[0m | |
Provide a comma-separated list of valid image extensions for directory | |
processing. (Ignored for images that are explicitly provided.) | |
Default: \033[1m%s\033[0m | |
\033[1m--phash\033[0m | |
Same as \033[1m--algo=phash\033[0m. | |
\033[1m-p\033[0m, \033[1m--progress\033[0m=\033[1my\033[0m[\033[1mes\033[0m]|\033[1mn\033[0m[\033[1mo\033[0m]|\033[1ms\033[0m[\033[1mmart\033[0m] | |
Display progress bar. (default: \033[1msmart\033[0m) | |
\033[1m-t\033[0m, \033[1m--threshold\033[0m=\033[4mN\033[0m | |
Display only images within Hamming distance \033[4mN\033[0m of target, where | |
0 \u2264 N \u2264 63. (Lower is closer match; default: 63) | |
\033[1m--only-matches\033[0m | |
Same as \033[1m--threshold=10\033[0m. | |
""" % (sys.argv[0], ','.join(EXTS)) | |
def _parseopt(argv): | |
global EXTS | |
algo, prog, thresh = avhash, PROG_SMART, 64 | |
try: | |
optlist, args = getopt.getopt(sys.argv[1:], 'ha:p:t:e:', | |
['help', | |
'algo=', 'algorithm=', | |
'average', 'avhash', 'avg', | |
'phash', | |
'progress=', | |
'threshold=', 'only-matches', | |
'exts=', 'extensions=']) | |
except getopt.GetoptError as e: | |
print e | |
sys.exit(2) | |
for opt, arg in optlist: | |
arg = arg.replace('=', '') | |
if opt in ('--help', '-h'): | |
_help() | |
sys.exit(0) | |
elif opt in ('--algo', '--algorithm', '-a'): | |
if arg in ('avhash', 'average', 'avg'): | |
algo = avhash | |
elif arg == 'phash': | |
algo = phash | |
else: | |
print >> sys.stderr, "Invalid algorithm: %s" % arg | |
elif opt in ('--average', '--avhash', '--avg'): | |
algo = avhash | |
elif opt == '--phash': | |
algo = phash | |
elif opt in ('--progress', '-p'): | |
if arg.lower() in ('yes', 'y', ''): | |
prog = PROG_YES | |
elif arg.lower() in ('no', 'n'): | |
prog = PROG_NO | |
elif arg.lower() in ('smart', 's', 'auto'): | |
prog = PROG_SMART | |
else: | |
print >> sys.stderr, "Invalid option: progress=%s" % arg | |
elif opt in ('--threshold', '-t'): | |
try: | |
thresh = int(arg) | |
except: | |
print >> sys.stderr, "Invalid option: threshold=%s" % arg | |
elif opt in ('--exts', '--extensions', '-e'): | |
EXTS = arg.split(',') | |
elif opt == '--only-matches': | |
thresh = 10 | |
if not args: args = [None] | |
return algo, prog, thresh, args[0], _expand_files(args[1:]) | |
def _progress(x, y, prog=PROG_YES): | |
if prog == PROG_YES or (prog == PROG_SMART and y > 10 and sys.stdout.isatty()): | |
perc, w = 100. * x / y, int(40. * x / y) | |
print '\rCalculating... [' + '#' * w + ' ' * (40 - w) + ']', | |
print '%.2f%%' % perc, '(%d/%d)' % (x, y), | |
if x == y: print | |
sys.stdout.flush() | |
if __name__ == '__main__': | |
algo, prog, thresh, f, fs = _parseopt(sys.argv[1:]) | |
if not f: | |
_help() | |
sys.exit(1) | |
else: | |
_compare(f, fs, algo=algo, prog=prog, thresh=thresh) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment