Skip to content

Instantly share code, notes, and snippets.

@vivekhaldar
Last active September 8, 2024 13:51
Show Gist options
  • Save vivekhaldar/92368f35da2d8bb8f12734d840d11cfa to your computer and use it in GitHub Desktop.
Save vivekhaldar/92368f35da2d8bb8f12734d840d11cfa to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import math
import sys
from moviepy.editor import AudioClip, VideoFileClip, concatenate_videoclips
# Get average RGB of part of a frame. Frame is H * W * 3 (rgb)
# Assumes x1 < x2, y1 < y2
def avg_rgb(frame, x1, y1, x2, y2):
r, g, b = 0, 0, 0
for x in range(x1, x2):
for y in range(y1, y2):
r += frame[x, y, 0]
g += frame[x, y, 1]
b += frame[x, y, 2]
total_pixels = (x2 - x1) * (y2 - y1)
avg_r = r / total_pixels
avg_g = g / total_pixels
avg_b = b / total_pixels
#print(avg_r, avg_g, avg_b)
return avg_r, avg_g, avg_b
# Look for colors in frame, edit based on that.
# Returns list of (start, end) tuples of time intervals we want to keep.
def color_edit(video):
intervals_to_keep = []
frame_marker = [] # 'c': content; 'y': keep prior interval; 'n': drop prior interval.
# Iterate over every frame.
for frame in video.iter_frames():
avg_r, avg_g, avg_b = avg_rgb(frame, 100, 100, 110, 110)
is_red = (avg_r > 120) and (avg_g < 50) and (avg_b < 50)
is_green = (avg_r < 50) and (avg_g > 120) and (avg_b < 50)
marker = 'c'
if is_red:
marker = 'n'
elif is_green:
marker = 'y'
frame_marker.append(marker)
keep_start, keep_end = 0, 0
keep_intervals = []
start_of_last_green = 0
for i in range(1, len(frame_marker)):
m1 = frame_marker[i - 1]
m2 = frame_marker[i]
# Content followed by green, take note.
if m1 == 'c' and m2 == 'y':
start_of_last_green = i
# Green followed by content. Keep previous interval. Start a (possible) new interval.
if m1 == 'y' and m2 == 'c':
keep_end = start_of_last_green / video.fps
keep_intervals.append([keep_start, keep_end])
keep_start = (i + 1) / video.fps
# Red followed by content. Drop the previous interval. Start a (possible) new interval.
if m1 == 'n' and m2 == 'c':
keep_start = i / video.fps
# Ending on green with no following content.
last_index = len(frame_marker) - 1
if frame_marker[last_index] == 'c' or frame_marker[last_index] == 'y':
keep_end = i / video.fps
keep_intervals.append([keep_start, keep_end])
return keep_intervals
# Iterate over audio to find the non-silent parts. Outputs a list of
# (speaking_start, speaking_end) intervals.
# Args:
# window_size: (in seconds) hunt for silence in windows of this size
# volume_threshold: volume below this threshold is considered to be silence
# ease_in: (in seconds) add this much silence around speaking intervals
def find_speaking(audio_clip, window_size=0.1, volume_threshold=0.02, ease_in=0.1, audio_fps=44100):
# First, iterate over audio to find all silent windows.
num_windows = math.floor(audio_clip.end/window_size)
window_is_silent = []
for i in range(num_windows):
s = audio_clip.subclip(i * window_size, (i + 1) * window_size).set_fps(audio_fps)
v = s.max_volume()
window_is_silent.append(v < volume_threshold)
# Find speaking intervals.
speaking_start = 0
speaking_end = 0
speaking_intervals = []
for i in range(1, len(window_is_silent)):
e1 = window_is_silent[i - 1]
e2 = window_is_silent[i]
# silence -> speaking
if e1 and not e2:
speaking_start = i * window_size
# speaking -> silence, now have a speaking interval
if not e1 and e2:
speaking_end = i * window_size
new_speaking_interval = [max(0, speaking_start - ease_in), speaking_end + ease_in]
# With tiny windows, this can sometimes overlap the previous window, so merge.
need_to_merge = len(speaking_intervals) > 0 and speaking_intervals[-1][1] > new_speaking_interval[0]
if need_to_merge:
merged_interval = [speaking_intervals[-1][0], new_speaking_interval[1]]
speaking_intervals[-1] = merged_interval
else:
speaking_intervals.append(new_speaking_interval)
return speaking_intervals
def main():
# Parse args
# Input file path
file_in = sys.argv[1]
# Output file path
file_out = sys.argv[2]
vid = VideoFileClip(file_in)
# Color edit.
intervals_to_keep = color_edit(vid)
print("Keeping color edit intervals: " + str(intervals_to_keep))
keep_clips = [vid.subclip(start, end) for [start, end] in intervals_to_keep]
color_edited_video = concatenate_videoclips(keep_clips)
# Cut out dead air.
speaking_intervals = find_speaking(color_edited_video.audio, audio_fps=vid.audio.fps)
print("Keeping speaking intervals: " + str(speaking_intervals))
speaking_clips = [color_edited_video.subclip(start, end) for [start, end] in speaking_intervals]
final_video = concatenate_videoclips(speaking_clips)
final_video.write_videofile(file_out,
#fps=60,
preset='ultrafast',
codec='libx264',
temp_audiofile='temp-audio.m4a',
remove_temp=True,
audio_codec="aac",
threads=6
)
vid.close()
if __name__ == '__main__':
main()
@Tenetri
Copy link

Tenetri commented May 3, 2022

really cool code! well done!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment