Created
November 8, 2020 18:47
-
-
Save carylee/62dbe19579ed9fa1714a3635ff0bdec7 to your computer and use it in GitHub Desktop.
Face-aware video cropping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import click | |
import ffmpeg | |
from matplotlib import pyplot as plt | |
import matplotlib | |
import face_recognition | |
from PIL import Image | |
import numpy as np | |
def face_location(filename): | |
probe = ffmpeg.probe(filename) | |
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None) | |
width = int(video_stream['width']) | |
height = int(video_stream['height']) | |
num_frames = int(video_stream['nb_frames']) | |
out, _ = ( | |
ffmpeg | |
.input(filename, ss=50, t=1) | |
.output('pipe:', format='rawvideo', pix_fmt='rgb24') | |
.run(capture_stdout=True) | |
) | |
video = np.frombuffer(out, np.uint8).reshape([-1, width, height, 3]) | |
image = video[0,:,:,:] | |
face_locations = face_recognition.face_locations(image) | |
return face_locations[0] | |
#print("I found {} faces in this photograph.".format(len(face_locations))) | |
#for face_location in face_locations: | |
# # Print the location of each face in this image | |
# top, right, bottom, left = face_location | |
# print("A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format(top, left, bottom, right)) | |
# # You can access the actual face itself like this: | |
# face_image = image[top:bottom, left:right] | |
# pil_image = Image.fromarray(face_image) | |
# pil_image.show() | |
@click.command() | |
@click.argument('filename', type=click.Path(exists=True)) | |
@click.argument('output') | |
def crop(filename, output): | |
probe = ffmpeg.probe(filename) | |
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None) | |
width = int(video_stream['width']) | |
height = int(video_stream['height']) | |
print(video_stream) | |
rotation = video_stream['tags'].get('rotate') | |
if rotation == '90' or rotation == '270': | |
height, width = width, height | |
num_frames = int(video_stream['nb_frames']) | |
print(video_stream) | |
top, left, bottom, right = face_location(filename) | |
out_w = 320 | |
out_h = 360 | |
r = out_w/out_h | |
crop_to_w = width | |
crop_to_h = width / (out_w / out_h) | |
face_height = bottom - top | |
above_face = (crop_to_h - face_height) / 3 | |
print("Above face: {}".format(above_face)) | |
( | |
ffmpeg | |
.input(filename) | |
.filter('tpad', start_duration=20) | |
.filter('crop', width, width/r, 0, top - above_face) | |
.filter('scale', out_w, out_h) | |
.filter('fps', fps=30) | |
.output('{}.mp4'.format(output)) | |
.overwrite_output() | |
.run() | |
) | |
if __name__ == '__main__': | |
crop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this, I appreciate it! I use this as the basis for my own script, which saved me a good amount of time yesterday. I did run into issues with your specific approach to cropping/scaling. Specifically it would fail when attempting to crop some dimension ratios.
The following was inspired by your code, but was able to handle the more ... unique ... input types I had to process.
https://gist.github.com/stefanpenner/ca0334f617acc02e67482947d1fcf7b0