Last active
November 1, 2022 01:30
-
-
Save jcupitt/a27d449c280863619035 to your computer and use it in GitHub Desktop.
smartcrop with Vips and Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# smartcrop with libvips, based very roughly on | |
# https://github.com/jwagner/smartcrop.js | |
import sys | |
import gi | |
gi.require_version('Vips', '8.0') | |
from gi.repository import Vips | |
image = Vips.Image.new_from_file(sys.argv[1]) | |
# options | |
# step crops by this much ... we downsample the feature images by this as well, | |
# so larger values make the search much quicker | |
step = 8 | |
# write an image to a memory buffer ... can give a speedup if you know an | |
# image is small and will be reused a lot | |
def memoryise(image): | |
new_image = Vips.Image.new_memory() | |
image.write(new_image) | |
return new_image | |
# sobel filter ... use this to find edges | |
sobel_hmask = Vips.Image.new_from_array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) | |
sobel_vmask = sobel_hmask.rot90() | |
def sobel_filter(image): | |
return image.conv(sobel_hmask).abs() + image.conv(sobel_vmask).abs() | |
# areas of skin colour ... threshold in LCh | |
def skin_filter(image): | |
lch = image.colourspace(Vips.Interpretation.LCH) | |
return ((lch > [5, 5, 0]) & (lch < [80, 30, 80])).bandand() | |
# areas high in saturation | |
def saturation_filter(image): | |
c = image.colourspace(Vips.Interpretation.LCH)[1] | |
return c > 30 | |
# feature-detect with an M x N box ... we look for a feature in the centre and | |
# no feature at the edges, ie. -1, 1, -1, in thirds. | |
def feature_detect(image, M, N): | |
print 'feature-detect M =', M, ', N =', N | |
third = int(M / 3) | |
mask = third * [-0.1] + third * [1] + third * [-0.1] | |
hmask = Vips.Image.new_from_array(mask, scale = third) | |
third = int(N / 3) | |
mask = third * [-0.1] + third * [1] + third * [-0.1] | |
vmask = Vips.Image.new_from_array(mask, scale = third).rot90() | |
return image.conv(hmask) + image.conv(vmask) | |
# make three base feature images | |
# downsample by step, the resolution we search for crops at | |
tiny = memoryise(image.shrink(step, step)) | |
edges = sobel_filter(tiny[1]) | |
skin = skin_filter(tiny) | |
saturation = saturation_filter(tiny) | |
# weight the three factors to make a score image | |
score = edges + 0.5 * skin + 0.5 * saturation | |
# put a 1 pixel black line around the score image ... we are going to blur at | |
# various scales and we don't want high edge values to be extended | |
score = score.embed(1, 1, score.width + 2, score.height + 2, extend = "black") | |
score = memoryise(score) | |
score.write_to_file("score.v") | |
edges.write_to_file("edges.v") | |
skin.write_to_file("skin.v") | |
saturation.write_to_file("saturation.v") | |
# now feature-detect the score image at various scales and search for | |
# maxima ... they will give the position of the centre of the crops at | |
# that scale | |
# default to a square crop | |
crop_width = min(image.width, image.height) | |
crop_height = min(image.width, image.height) | |
best_score = 0 | |
# search from small crops to large ones .. from 1/2 minimum dimension up | |
for scale in range(5, 11): | |
width = (crop_width * scale) / 10 | |
height = (crop_height * scale) / 10 | |
# search for the brightest rectangle | |
features = feature_detect(score, width / step, height / step) | |
v, x, y = features.maxpos() | |
print 'width =', width, 'height =', height | |
print 'x =', x * step, 'y =', y * step | |
print 'score =', v | |
if v > best_score: | |
print 'new best score', v | |
best_score = v | |
best_x = (x - 1) * step - width / 2 | |
best_y = (y - 1) * step - height / 2 | |
best_w = width | |
best_h = height | |
# clip against image edges | |
if best_x < 0: | |
best_w += best_x | |
best_x = 0 | |
if best_y < 0: | |
best_h += best_y | |
best_y = 0 | |
if best_x + best_w > image.width: | |
best_w = image.width - best_x | |
if best_y + best_h > image.height: | |
best_h = image.height - best_y | |
features.write_to_file("features.v") | |
print 'best crop:' | |
print 'x =', best_x, 'y =', best_y | |
print 'w =', best_w, 'h =', best_h | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment