Created
November 14, 2018 21:08
-
-
Save danstowell/3470accc6b0775c4b826c858549a84bd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# simple audio sync example by Dan Stowell Nov 2018 | |
import librosa # lib... Rosa! | |
import os | |
import numpy as np | |
############################################### | |
maxlagsecs = 10 # the maximum offset between two audio files that will be considered | |
verbose = True | |
sr = 22050 # enforce a common sample rate | |
n_fft = 1024 | |
hop_length = 512 | |
############################################### | |
def ingest_a_wav_file(wavpath): | |
"load a file, convert to mel spectrogram, limit to main speech range 100 Hz - 5000 Hz, normalise power" | |
global sr, hop_length, n_fft | |
y, sr = librosa.load(wavpath, sr=sr) | |
spec, n_fft = librosa.core.spectrum._spectrogram(y=y, n_fft=n_fft, hop_length=hop_length) | |
mel_basis = librosa.filters.mel(sr, n_fft=n_fft, fmin=100, fmax=5000) | |
melspec = np.dot(mel_basis, spec) | |
melspec /= np.sum(melspec * melspec) | |
if verbose: | |
print("Loaded %s. Shape %s" % (wavpath, melspec.shape)) | |
return melspec # shape (n_mels, t) | |
def compare_two_audios(spec1, spec2): | |
# simple spectrogram cross-correlation -- with a maximum relative lag (in BOTH directions), normalised to the cross-correlation by dividing by the two stdevs | |
global sr, hop_length, n_fft | |
maxlagframes = librosa.core.time_to_frames(maxlagsecs, sr, hop_length, n_fft) | |
shortestlen = min(spec1.shape[1], spec2.shape[1]) | |
offsetframes = range(-maxlagframes, maxlagframes+1) | |
result = [None for _ in offsetframes] | |
# it's slightly more precise to be doing the mean-and-std normalisation within the subspecs (on each loop iteration) but here we just do it once | |
spec1 = (spec1 - np.mean(spec1)) / np.std(spec1) | |
spec2 = (spec2 - np.mean(spec2)) / np.std(spec2) | |
for resultpos, lag in enumerate(offsetframes): | |
subspec1 = spec1[:, 0+max(0, lag):shortestlen-max(0, -lag)] | |
subspec2 = spec2[:, 0+max(0, -lag):shortestlen-max(0, lag)] | |
result[resultpos] = np.mean(subspec1 * subspec2) | |
offsetsecs = librosa.core.frames_to_time(offsetframes, sr, hop_length) # do not pass n_fft to librosa here, else it adds a time-offset that's irrelevant here | |
return offsetsecs, result | |
def compare_multi_wav_files(filelist): | |
"given a list of wav files, load em and do all pairwise comparisons. returns a matrix in which each entry has the peak xcorr strength, and its offset" | |
ret = [[None for _ in range(len(filelist))] for __ in range(len(filelist))] | |
specs = [ingest_a_wav_file(fpath) for fpath in filelist] # NB if you have loooooads of files this will exhaust RAM; instead, you'd only keep 2 in mem each time | |
for i in range(len(filelist)): | |
for j in range(0, len(filelist)): | |
offsets, corrs = compare_two_audios(specs[i], specs[j]) | |
# find the peak, and plop it into the results matrix | |
peakpos = np.argmax(corrs) | |
ret[i][j] = (offsets[peakpos], corrs[peakpos]) | |
return ret | |
#################################################### | |
if __name__=='__main__': | |
# Two files which should match each other somehow, plus another that... shouldn't | |
filelist = [ | |
'/home/dan/audio_misc/sample-audio-files-from-midsummer-nights-dream/original-full-files/d5F2.ogg.wav', | |
'/home/dan/audio_misc/sample-audio-files-from-midsummer-nights-dream/original-full-files/Titania-dTHT.ogg.wav', | |
'/home/dan/birdsong/BL_SoundsOfGardenBirds/17 Feral Pigeon - Song.aiff', | |
] | |
results = compare_multi_wav_files(filelist) | |
for i in range(len(filelist)): | |
for j in range(0, len(filelist)): | |
print("%s vs %s:\n match strength %.1f %%, time offset %f" % (os.path.basename(filelist[i]), os.path.basename(filelist[j]), | |
results[i][j][1] * 100, results[i][j][0])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment