-
-
Save mvelbaum/60e35846c5ad51b102f3b4d76ff1c591 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import math | |
import sys | |
from moviepy.editor import AudioClip, VideoFileClip, concatenate_videoclips | |
# Get average RGB of part of a frame. Frame is H * W * 3 (rgb) | |
# Assumes x1 < x2, y1 < y2 | |
def avg_rgb(frame, x1, y1, x2, y2): | |
r, g, b = 0, 0, 0 | |
for x in range(x1, x2): | |
for y in range(y1, y2): | |
r += frame[x, y, 0] | |
g += frame[x, y, 1] | |
b += frame[x, y, 2] | |
total_pixels = (x2 - x1) * (y2 - y1) | |
avg_r = r / total_pixels | |
avg_g = g / total_pixels | |
avg_b = b / total_pixels | |
#print(avg_r, avg_g, avg_b) | |
return avg_r, avg_g, avg_b | |
# Look for colors in frame, edit based on that. | |
# Returns list of (start, end) tuples of time intervals we want to keep. | |
def color_edit(video): | |
intervals_to_keep = [] | |
frame_marker = [] # 'c': content; 'y': keep prior interval; 'n': drop prior interval. | |
# Iterate over every frame. | |
for frame in video.iter_frames(): | |
avg_r, avg_g, avg_b = avg_rgb(frame, 100, 100, 110, 110) | |
is_red = (avg_r > 120) and (avg_g < 50) and (avg_b < 50) | |
is_green = (avg_r < 50) and (avg_g > 120) and (avg_b < 50) | |
marker = 'c' | |
if is_red: | |
marker = 'n' | |
elif is_green: | |
marker = 'y' | |
frame_marker.append(marker) | |
keep_start, keep_end = 0, 0 | |
keep_intervals = [] | |
start_of_last_green = 0 | |
for i in range(1, len(frame_marker)): | |
m1 = frame_marker[i - 1] | |
m2 = frame_marker[i] | |
# Content followed by green, take note. | |
if m1 == 'c' and m2 == 'y': | |
start_of_last_green = i | |
# Green followed by content. Keep previous interval. Start a (possible) new interval. | |
if m1 == 'y' and m2 == 'c': | |
keep_end = start_of_last_green / video.fps | |
keep_intervals.append([keep_start, keep_end]) | |
keep_start = (i + 1) / video.fps | |
# Red followed by content. Drop the previous interval. Start a (possible) new interval. | |
if m1 == 'n' and m2 == 'c': | |
keep_start = i / video.fps | |
# Ending on green with no following content. | |
last_index = len(frame_marker) - 1 | |
if frame_marker[last_index] == 'c' or frame_marker[last_index] == 'y': | |
keep_end = i / video.fps | |
keep_intervals.append([keep_start, keep_end]) | |
return keep_intervals | |
# Iterate over audio to find the non-silent parts. Outputs a list of | |
# (speaking_start, speaking_end) intervals. | |
# Args: | |
# window_size: (in seconds) hunt for silence in windows of this size | |
# volume_threshold: volume below this threshold is considered to be silence | |
# ease_in: (in seconds) add this much silence around speaking intervals | |
def find_speaking(audio_clip, window_size=0.1, volume_threshold=0.02, ease_in=0.1, audio_fps=44100): | |
# First, iterate over audio to find all silent windows. | |
num_windows = math.floor(audio_clip.end/window_size) | |
window_is_silent = [] | |
for i in range(num_windows): | |
s = audio_clip.subclip(i * window_size, (i + 1) * window_size).set_fps(audio_fps) | |
v = s.max_volume() | |
window_is_silent.append(v < volume_threshold) | |
# Find speaking intervals. | |
speaking_start = 0 | |
speaking_end = 0 | |
speaking_intervals = [] | |
for i in range(1, len(window_is_silent)): | |
e1 = window_is_silent[i - 1] | |
e2 = window_is_silent[i] | |
# silence -> speaking | |
if e1 and not e2: | |
speaking_start = i * window_size | |
# speaking -> silence, now have a speaking interval | |
if not e1 and e2: | |
speaking_end = i * window_size | |
new_speaking_interval = [max(0, speaking_start - ease_in), speaking_end + ease_in] | |
# With tiny windows, this can sometimes overlap the previous window, so merge. | |
need_to_merge = len(speaking_intervals) > 0 and speaking_intervals[-1][1] > new_speaking_interval[0] | |
if need_to_merge: | |
merged_interval = [speaking_intervals[-1][0], new_speaking_interval[1]] | |
speaking_intervals[-1] = merged_interval | |
else: | |
speaking_intervals.append(new_speaking_interval) | |
return speaking_intervals | |
def main(): | |
# Parse args | |
# Input file path | |
file_in = sys.argv[1] | |
# Output file path | |
file_out = sys.argv[2] | |
vid = VideoFileClip(file_in) | |
# Color edit. | |
intervals_to_keep = color_edit(vid) | |
print("Keeping color edit intervals: " + str(intervals_to_keep)) | |
keep_clips = [vid.subclip(start, end) for [start, end] in intervals_to_keep] | |
color_edited_video = concatenate_videoclips(keep_clips) | |
# Cut out dead air. | |
speaking_intervals = find_speaking(color_edited_video.audio, audio_fps=vid.audio.fps) | |
print("Keeping speaking intervals: " + str(speaking_intervals)) | |
speaking_clips = [color_edited_video.subclip(start, end) for [start, end] in speaking_intervals] | |
final_video = concatenate_videoclips(speaking_clips) | |
final_video.write_videofile(file_out, | |
#fps=60, | |
preset='ultrafast', | |
codec='libx264', | |
temp_audiofile='temp-audio.m4a', | |
remove_temp=True, | |
audio_codec="aac", | |
threads=6 | |
) | |
vid.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment