Last active
December 30, 2015 17:16
-
-
Save xjcl/51e0b5d1fb42c74dd923 to your computer and use it in GitHub Desktop.
Program to find occurrences of one audio clip inside another audio clip.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Program to find occurrences of one audio clip inside another audio clip. | |
# you need to youtube-dl the video with id mPdcedZ9GxE for the example | |
# WARNING! example requires 3GB of free RAM | |
from moviepy.editor import * | |
from numpy import * | |
from scipy.signal import * | |
# poppy bros: 8200 10000 68 | |
def occurences(haystack, needle, threshold, peak_dist, needle_seconds): | |
'''Finds all occurences of needle in haystack (both are audio encoded as 1xn-array) | |
Magical constants: threshold and peak_dist. Different every time, adjust by hand! | |
needle_seconds: length of the needle audio, in seconds | |
>>Returns: array of occurences (in seconds)''' | |
corr = abs(fftconvolve(haystack, needle, mode='same')) | |
peaks = where(corr > threshold)[0] | |
summary_peaks = [] | |
last_peak = -inf | |
for peak in peaks: | |
if peak - last_peak > peak_dist: | |
summary_peaks.append(peak) | |
last_peak = peak | |
return (array(summary_peaks) / len(corr) * needle_seconds, corr[summary_peaks]) | |
if __name__ == '__main__': | |
'''Example: Find all >shine collect<s in a Super Mario Sunshine Longplay''' | |
s = "[Longplay] Super Mario Sunshine - Part 1-mPdcedZ9GxE.mp4" | |
clip = VideoFileClip(s).subclip((0,0), (12,0)) | |
t = "shine_collect.mp4" | |
clip.subclip((3,18),(3,22)).write_videofile(t) # we only need to generate it once | |
template = VideoFileClip(t) | |
print('step 1/3 (loading haystack and needle) done') | |
ts = template.audio.to_soundarray()[::-1] # convolution is REVERSED correlation | |
# i do not know if this is ok, but i have a 2xn-array and need a 1xn-array... | |
ts = ts[:,0] + ts[:,1] | |
cs = clip.audio.to_soundarray() | |
cs = cs[:,0] + cs[:,1] | |
print('step 2/3 (converting audio to_soundarray) done') | |
set_printoptions(threshold=inf) | |
print(occurences(cs, ts, 3500, 400000, 720)) | |
print('step 3/3 (finding all needles in haystack) done') | |
# expected | |
# 3:18 198 | |
# 11:11 671 | |
# 15:41 941 | |
# 20:45 1245 | |
# result (first 30 mins, using 12 GB of mem LOL) (12 min works in 3 ;)) | |
# array([ 198.94752835, 671.8446712 , 941.03247165, 1245.3403855 ]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment