m4p · June 17, 2019 15:08
diff --git a/double_ender_sync.py b/double_ender_sync.py
 #!/usr/local/bin/python3
 from pydub import AudioSegment
 from pyAudioAnalysis import audioBasicIO as aIO
 from pyAudioAnalysis import audioSegmentation as aS
 import sys
 import numpy
 from scipy.io import wavfile
 from scipy.signal import fftconvolve

 def usage():
    sys.exit("Usage: double_ender_sync master.wav sync.wav sync2.wav ...")

 if len(sys.argv) < 3:
    usage()


 master = AudioSegment.from_wav(sys.argv[1])
 master = master.set_channels(1)


 files_to_sync = sys.argv[2:]

 filenumber = 0

 for sync_filename in files_to_sync:

 	print("Syncing %s to %s" %(sync_filename,sys.argv[1]))
 	filenumber = filenumber + 1 

 	sync   = AudioSegment.from_file(sync_filename)
 	sync   = sync.set_channels(1)
 	
 	# First reduce file sizes by only looking at relevant areas

 	needle_abs_index = 0
 	
 	offset = abs(len(sync)-len(master)) * 1.05
 		
 	# If there's less then 5 minutes difference, give us a bit more headroom.
 	if (offset<5*60*1000):
 		offset = 5*60*1000
 	
 	search_area = master[:offset*2]
 	sample_area = sync[offset:10*60*1000+offset]
 	
 	needle_abs_index = offset
 	
 	search_area.export("search_area.wav", format="wav")
 	sample_area.export("sample_area.wav", format="wav")
 	
 	
 	# Segment sample area into speech bits and use first one to locate within 
 	
 	[Fs, x] = aIO.readAudioFile("sample_area.wav")
 	segments = aS.silenceRemoval(x, Fs, 0.05, 0.05, 1.0, 0.8, False)
 	
 	for timeidx in segments:
 		start = timeidx[0] * 1000
 		end = timeidx[1] * 1000
 		needle_abs_index = needle_abs_index + start
 		
 		if (end-start>2*1000):
 			needle =  sample_area[start:end]
 			print("Found a needle")
 			needle.export("needle.wav", format="wav")
 			break
 		
 	# Search code adapted from wavgrep.py (https://gist.github.com/patrakov/8a8095721ee81d49f16c)
 	
 	needle_rate, needle = wavfile.read("needle.wav")
 	haystack_rate, haystack = wavfile.read("search_area.wav")
 	
 	if needle_rate != haystack_rate:
 		sys.exit("Sample rates are not the same")
 	
 	needle = numpy.array(needle, dtype=numpy.float64)
 	needle_len = len(needle)
 	haystack = numpy.array(haystack, dtype=numpy.float64)
 	haystack_len = len(haystack)
 	
 	needle_norm = needle.dot(needle)
 	
 	if needle_norm < 1000.0:
 		sys.exit("The needle is almost silent")
 		
 	haystack_squared = numpy.hstack(([0.0], haystack * haystack))
 	haystack_cum_norm = numpy.cumsum(haystack_squared)
 	haystack_norm_at = haystack_cum_norm[needle_len:haystack_len + 1] - haystack_cum_norm[0:haystack_len + 1 - needle_len]
 		
 	correlation_at = fftconvolve(haystack, needle[::-1], mode='valid')
 	difference_norm_at = haystack_norm_at + needle_norm - 2 * correlation_at
 	cos2phi_at = correlation_at * correlation_at / (haystack_norm_at + 0.000001) / needle_norm

 	at = numpy.argmin(difference_norm_at)
 	
 	# Calculate diffs and write synced file.
 	
 	time_offset = abs(needle_abs_index-(at/haystack_rate*1000))
 	
 	print("Absolute needle pos: %d" % needle_abs_index)
 	print("The needle starts at ms: %d" % round(at/haystack_rate*1000))
 	print("Time Offset: %d seconds" % round(time_offset/1000))
 	
 	synced = sync[time_offset:]
 	synced.export("synced-track%d.wav" % filenumber, format="wav")
	#!/usr/local/bin/python3
	from pydub import AudioSegment
	from pyAudioAnalysis import audioBasicIO as aIO
	from pyAudioAnalysis import audioSegmentation as aS
	import sys
	import numpy
	from scipy.io import wavfile
	from scipy.signal import fftconvolve

	def usage():
	sys.exit("Usage: double_ender_sync master.wav sync.wav sync2.wav ...")

	if len(sys.argv) < 3:
	usage()


	master = AudioSegment.from_wav(sys.argv[1])
	master = master.set_channels(1)


	files_to_sync = sys.argv[2:]

	filenumber = 0

	for sync_filename in files_to_sync:

	print("Syncing %s to %s" %(sync_filename,sys.argv[1]))
	filenumber = filenumber + 1

	sync = AudioSegment.from_file(sync_filename)
	sync = sync.set_channels(1)

	# First reduce file sizes by only looking at relevant areas

	needle_abs_index = 0

	offset = abs(len(sync)-len(master)) * 1.05

	# If there's less then 5 minutes difference, give us a bit more headroom.
	if (offset<5601000):
	offset = 5601000

	search_area = master[:offset*2]
	sample_area = sync[offset:10601000+offset]

	needle_abs_index = offset

	search_area.export("search_area.wav", format="wav")
	sample_area.export("sample_area.wav", format="wav")


	# Segment sample area into speech bits and use first one to locate within

	[Fs, x] = aIO.readAudioFile("sample_area.wav")
	segments = aS.silenceRemoval(x, Fs, 0.05, 0.05, 1.0, 0.8, False)

	for timeidx in segments:
	start = timeidx[0] * 1000
	end = timeidx[1] * 1000
	needle_abs_index = needle_abs_index + start

	if (end-start>2*1000):
	needle = sample_area[start:end]
	print("Found a needle")
	needle.export("needle.wav", format="wav")
	break

	# Search code adapted from wavgrep.py (https://gist.github.com/patrakov/8a8095721ee81d49f16c)

	needle_rate, needle = wavfile.read("needle.wav")
	haystack_rate, haystack = wavfile.read("search_area.wav")

	if needle_rate != haystack_rate:
	sys.exit("Sample rates are not the same")

	needle = numpy.array(needle, dtype=numpy.float64)
	needle_len = len(needle)
	haystack = numpy.array(haystack, dtype=numpy.float64)
	haystack_len = len(haystack)

	needle_norm = needle.dot(needle)

	if needle_norm < 1000.0:
	sys.exit("The needle is almost silent")

	haystack_squared = numpy.hstack(([0.0], haystack * haystack))
	haystack_cum_norm = numpy.cumsum(haystack_squared)
	haystack_norm_at = haystack_cum_norm[needle_len:haystack_len + 1] - haystack_cum_norm[0:haystack_len + 1 - needle_len]

	correlation_at = fftconvolve(haystack, needle[::-1], mode='valid')
	difference_norm_at = haystack_norm_at + needle_norm - 2 * correlation_at
	cos2phi_at = correlation_at * correlation_at / (haystack_norm_at + 0.000001) / needle_norm

	at = numpy.argmin(difference_norm_at)

	# Calculate diffs and write synced file.

	time_offset = abs(needle_abs_index-(at/haystack_rate*1000))

	print("Absolute needle pos: %d" % needle_abs_index)
	print("The needle starts at ms: %d" % round(at/haystack_rate*1000))
	print("Time Offset: %d seconds" % round(time_offset/1000))

	synced = sync[time_offset:]
	synced.export("synced-track%d.wav" % filenumber, format="wav")