Skip to content

Instantly share code, notes, and snippets.

@jcupitt
Last active November 1, 2022 01:30
Show Gist options
  • Save jcupitt/a27d449c280863619035 to your computer and use it in GitHub Desktop.
Save jcupitt/a27d449c280863619035 to your computer and use it in GitHub Desktop.
smartcrop with Vips and Python
#!/usr/bin/python
# smartcrop with libvips, based very roughly on
# https://github.com/jwagner/smartcrop.js
import sys
import gi
gi.require_version('Vips', '8.0')
from gi.repository import Vips
image = Vips.Image.new_from_file(sys.argv[1])
# options
# step crops by this much ... we downsample the feature images by this as well,
# so larger values make the search much quicker
step = 8
# write an image to a memory buffer ... can give a speedup if you know an
# image is small and will be reused a lot
def memoryise(image):
new_image = Vips.Image.new_memory()
image.write(new_image)
return new_image
# sobel filter ... use this to find edges
sobel_hmask = Vips.Image.new_from_array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
sobel_vmask = sobel_hmask.rot90()
def sobel_filter(image):
return image.conv(sobel_hmask).abs() + image.conv(sobel_vmask).abs()
# areas of skin colour ... threshold in LCh
def skin_filter(image):
lch = image.colourspace(Vips.Interpretation.LCH)
return ((lch > [5, 5, 0]) & (lch < [80, 30, 80])).bandand()
# areas high in saturation
def saturation_filter(image):
c = image.colourspace(Vips.Interpretation.LCH)[1]
return c > 30
# feature-detect with an M x N box ... we look for a feature in the centre and
# no feature at the edges, ie. -1, 1, -1, in thirds.
def feature_detect(image, M, N):
print 'feature-detect M =', M, ', N =', N
third = int(M / 3)
mask = third * [-0.1] + third * [1] + third * [-0.1]
hmask = Vips.Image.new_from_array(mask, scale = third)
third = int(N / 3)
mask = third * [-0.1] + third * [1] + third * [-0.1]
vmask = Vips.Image.new_from_array(mask, scale = third).rot90()
return image.conv(hmask) + image.conv(vmask)
# make three base feature images
# downsample by step, the resolution we search for crops at
tiny = memoryise(image.shrink(step, step))
edges = sobel_filter(tiny[1])
skin = skin_filter(tiny)
saturation = saturation_filter(tiny)
# weight the three factors to make a score image
score = edges + 0.5 * skin + 0.5 * saturation
# put a 1 pixel black line around the score image ... we are going to blur at
# various scales and we don't want high edge values to be extended
score = score.embed(1, 1, score.width + 2, score.height + 2, extend = "black")
score = memoryise(score)
score.write_to_file("score.v")
edges.write_to_file("edges.v")
skin.write_to_file("skin.v")
saturation.write_to_file("saturation.v")
# now feature-detect the score image at various scales and search for
# maxima ... they will give the position of the centre of the crops at
# that scale
# default to a square crop
crop_width = min(image.width, image.height)
crop_height = min(image.width, image.height)
best_score = 0
# search from small crops to large ones .. from 1/2 minimum dimension up
for scale in range(5, 11):
width = (crop_width * scale) / 10
height = (crop_height * scale) / 10
# search for the brightest rectangle
features = feature_detect(score, width / step, height / step)
v, x, y = features.maxpos()
print 'width =', width, 'height =', height
print 'x =', x * step, 'y =', y * step
print 'score =', v
if v > best_score:
print 'new best score', v
best_score = v
best_x = (x - 1) * step - width / 2
best_y = (y - 1) * step - height / 2
best_w = width
best_h = height
# clip against image edges
if best_x < 0:
best_w += best_x
best_x = 0
if best_y < 0:
best_h += best_y
best_y = 0
if best_x + best_w > image.width:
best_w = image.width - best_x
if best_y + best_h > image.height:
best_h = image.height - best_y
features.write_to_file("features.v")
print 'best crop:'
print 'x =', best_x, 'y =', best_y
print 'w =', best_w, 'h =', best_h
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment