Created
April 14, 2011 01:21
-
-
Save samuelclay/918751 to your computer and use it in GitHub Desktop.
Algorithm to find the dominant color in an image.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import scipy | |
import scipy.cluster | |
from pprint import pprint | |
image = Image.open('logo_newsblur_512.png') | |
NUM_CLUSTERS = 15 | |
# Convert image into array of values for each point. | |
ar = scipy.misc.fromimage(image) | |
shape = ar.shape | |
# Reshape array of values to merge color bands. | |
if len(shape) > 2: | |
ar = ar.reshape(scipy.product(shape[:2]), shape[2]) | |
# Get NUM_CLUSTERS worth of centroids. | |
codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) | |
# Pare centroids, removing blacks and whites and shades of really dark and really light. | |
original_codes = codes | |
for low, hi in [(60, 200), (35, 230), (10, 250)]: | |
codes = scipy.array([code for code in codes | |
if not ((code[0] < low and code[1] < low and code[2] < low) or | |
(code[0] > hi and code[1] > hi and code[2] > hi))]) | |
if not len(codes): codes = original_codes | |
else: break | |
# Assign codes (vector quantization). Each vector is compared to the centroids | |
# and assigned the nearest one. | |
vecs, _ = scipy.cluster.vq.vq(ar, codes) | |
# Count occurences of each clustered vector. | |
counts, bins = scipy.histogram(vecs, len(codes)) | |
# Show colors for each code in its hex value. | |
colors = [''.join(chr(c) for c in code).encode('hex') for code in codes] | |
total = scipy.sum(counts) | |
color_dist = dict(zip(colors, [count/float(total) for count in counts])) | |
pprint(color_dist) | |
# Find the most frequent color, based on the counts. | |
index_max = scipy.argmax(counts) | |
peak = codes[index_max] | |
color = ''.join(chr(c) for c in peak).encode('hex') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice algorithm. Do you have any suggestions on how to return the second most frequent color? This is for the case when black is the most frequent and you wish to ignore it.
Thank you for sharing!