Last active
November 1, 2018 16:31
-
-
Save evizitei/bfa121d9edcbfabeff8ae0bed54d2f50 to your computer and use it in GitHub Desktop.
Cut a video file into silent and noisy clips (2 output files), or into segments with one face and other (non-one-face) segments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires https://github.com/Zulko/moviepy | |
# requires https://github.com/jiaaro/pydub | |
# requires https://github.com/ageitgey/face_recognition | |
from pydub import AudioSegment | |
from moviepy.editor import * | |
import face_recognition | |
import itertools | |
import numpy as np | |
# output is in millisecond tuples | |
def silent_segments(seg): | |
slice_size = 100 | |
gap_tolerance = 1500 # how many milliseconds of silence is ok | |
gap_segments_tol = gap_tolerance / slice_size | |
db_levels = [s.dBFS for s in seg[::slice_size]] | |
sound_dist = [d for d in db_levels if d > -55] | |
mu = np.mean(sound_dist) | |
sigma = np.std(sound_dist) | |
trim_threshold = mu - 2*sigma; | |
quiet_segs = [] | |
running_count = 0 | |
for i in range(len(db_levels)): | |
seg = db_levels[i] | |
if seg > trim_threshold: | |
if running_count >= gap_segments_tol: | |
quiet_segs.append(((i - running_count)*slice_size, (i*slice_size))) | |
running_count = 0 | |
continue | |
running_count = running_count + 1 | |
return quiet_segs | |
def infer_segments_complement(input_segments, duration): | |
complement_segments = [] | |
offset = 0 | |
for seg in input_segments: | |
if seg[0] > offset: | |
complement_segments.append((offset, seg[0])) | |
offset = seg[1] | |
if offset < duration: | |
complement_segments.append((offset, duration)) | |
return complement_segments | |
def cut_together(file, segments, units='milliseconds'): | |
clips = [] | |
for segment in segments: | |
t_start = float(segment[0]) | |
t_stop = float(segment[1]) | |
if units == 'milliseconds': | |
t_start = t_start / float(1000) | |
t_stop = t_stop / float(1000) | |
clips.append(file.subclip(t_start, t_stop)) | |
return concatenate_videoclips(clips) | |
def cut_to_relevent_video(input_file, output_name): | |
audio_from_file = AudioSegment.from_file(input_file, "mp4") | |
quiet_segments = silent_segments(audio_from_file) | |
noisy_segments = infer_segments_complement(quiet_segments, len(audio_from_file)) | |
full_file = VideoFileClip(input_file) | |
quiet_clip = cut_together(full_file, quiet_segments) | |
noisy_clip = cut_together(full_file, noisy_segments) | |
quiet_clip.write_videofile("./%s_quiet.mp4" % ((output_name)), temp_audiofile="temp-audio.m4a", remove_temp=True, codec="libx264", audio_codec="aac") | |
noisy_clip.write_videofile("./%s_noisy.mp4" % ((output_name)), temp_audiofile="temp-audio.m4a", remove_temp=True, codec="libx264", audio_codec="aac") | |
def find_one_face_segments(video_clip, slice_size_in_seconds=0.5): | |
one_face_segs = [] | |
running_count = 0 | |
offset = 0 | |
currently_single_face = False | |
start_point = 0 | |
while(offset <= video_clip.duration): | |
print(offset, "seconds.....") | |
frame = video_clip.get_frame(offset) | |
face_count = len(face_recognition.face_locations(frame)) | |
if currently_single_face and face_count != 1: # stop interval | |
currently_single_face = False | |
one_face_segs.append((start_point, offset)) | |
elif not currently_single_face and face_count == 1: # start interval | |
currently_single_face = True | |
start_point = offset | |
else: | |
pass | |
offset += slice_size_in_seconds | |
if currently_single_face: | |
one_face_segs.append((start_point, video_clip.duration)) | |
return one_face_segs | |
def cut_to_single_face_video(input_file, output_name): | |
video_clip = VideoFileClip(input_file) | |
one_face_segments = find_one_face_segments(video_clip) | |
other_segments = infer_segments_complement(one_face_segments, video_clip.duration) | |
one_face_clip = cut_together(video_clip, one_face_segments, units='seconds') | |
others_clip = cut_together(video_clip, other_segments, units='seconds') | |
one_face_clip.write_videofile("./%s_one_face.mp4" % ((output_name)), temp_audiofile="temp-audio.m4a", remove_temp=True, codec="libx264", audio_codec="aac") | |
others_clip.write_videofile("./%s_NOT_one_face.mp4" % ((output_name)), temp_audiofile="temp-audio.m4a", remove_temp=True, codec="libx264", audio_codec="aac") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment