Created
August 5, 2024 04:31
-
-
Save sammlapp/7c7ebbbe265d2fd75f238d4e92f3d2be to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""this script extracts a measurement of sound pressure level from an audio file | |
specifically, it | |
(1) calculates a spectrogram | |
(2) subtracts a spectral noise profile based on the Nth percentile value of each spectrogram row | |
(3) calibrates the spectrogram to sound pressure level using a calibration curve from a csv file | |
(4) reconstructs a time series of rms values from the spectrogram | |
(5) extracts the maximum value of rms and converts to dB | |
""" | |
from opensoundscape import Audio, Spectrogram | |
import scipy | |
import numpy as np | |
import librosa | |
spec_params = {"window_samples": 512, "hop_size": 256} | |
def calculate_calibrated_spec( | |
audio, | |
bandpass_range, | |
background_noise_pct=25, | |
): | |
""" | |
Args: | |
audio: opensoundscape.Audio object | |
bandpass_range: [low_f, high_f] to bandpass audio | |
calibration_curve: dictionary of {freq:dB} to convert dBFS to dB SPL re 20 uPa for | |
the digital recording; values are linearly interpolated onto the spectrogram's | |
frequency values | |
background_noise_pct: percentile of pixel to use as background noise estimate | |
- background noise spectrum is computed row-wise on the spectrogram | |
Returns: | |
max value | |
""" | |
# generate spectrogram | |
# these settings provide a specrogram that can be summed back to the original amplitude | |
# note the use of a rectangular window, 'magnitude' mode, and 'spectrum' scaling | |
f, t, scipy_spec = scipy.signal.spectrogram( | |
x=audio.samples, | |
fs=audio.sample_rate, | |
mode="magnitude", | |
scaling="spectrum", | |
nperseg=spec_params["window_samples"], | |
nfft=spec_params["window_samples"], | |
noverlap=spec_params["hop_size"], | |
window=np.ones(spec_params["window_samples"]), | |
return_onesided=True, | |
) | |
# bandpass the spectrogram to the desired frequency range (replace values outside bandpass range with 0) | |
for i, fi in enumerate(f): | |
if fi < bandpass_range[0] or fi > bandpass_range[1]: | |
scipy_spec[i, :] = 0 | |
# make opensoundscape Spectrogram object | |
s = Spectrogram( | |
scipy_spec, | |
f, | |
t, | |
window_samples=spec_params["window_samples"], | |
overlap_samples=spec_params["window_samples"] - spec_params["hop_size"], | |
audio_sample_rate=audio.sample_rate, | |
scaling="spectrum", | |
) | |
# estimate the background noise as the [`background_noise_pct`]th percentile of each row of the spectrogram | |
noise_spectrum = np.percentile(s.spectrogram, background_noise_pct, axis=1) | |
# row-wise subtraction of noise from spectrogram | |
noise_reduced = s.spectrogram - noise_spectrum[:, np.newaxis] | |
eps = 1e-12 | |
noise_reduced[noise_reduced < eps] = eps | |
# create new spectrogram object with noise-reduced values | |
# add small epsilon to avoid issues if we later take the log10 | |
noise_reduced_spec = s._spawn(spectrogram=noise_reduced + eps) | |
return noise_reduced_spec | |
path = "example.wav" | |
offset = 0 # seconds to start of clip from beginning of audio file | |
duration = 3 # seconds | |
bandpass_range = [3000, 5000] # Hz, frequency range of target sound to measure | |
audio = Audio.from_file(path, offset, duration) | |
noise_reduced_spec = calculate_calibrated_spec( | |
audio, | |
bandpass_range, | |
background_noise_pct=25, | |
) | |
# use frequency response curves to calibrate | |
# assues you have a csv with columns: `freq` (in Hz) and `spectrum` (in dB) giving the | |
# offset to convert from dBFS to dB SPL | |
fr = pd.read_csv("frequency_response_magnitude_scaling.csv") | |
# interpolate measured calibration curve values onto the frequencies of our spectrogram | |
calibration_interpolated = np.interp( | |
noise_reduced_spec.frequencies, fr.freq, fr.spectrum | |
) | |
# convert dB to linear coefficients | |
calibration_interpolated_linear = np.exp(10, calibration_interpolated / 20) | |
noise_reduced_logspec = noise_reduced_spec._spawn( | |
spectrogram=20 * np.log10(noise_reduced_spec.spectrogram) | |
) | |
# apply calibration offsets per frequency | |
s_calibrated = noise_reduced_logspec._spawn( | |
spectrogram=noise_reduced_logspec.spectrogram | |
* calibration_interpolated_linear[:, np.newaxis] | |
) | |
# (sum instead of multilply if using log values) | |
# convert spec to rms time series using librosa | |
rms_time_series = librosa.feature.rms( | |
S=s_calibrated.spectrogram * spec_params["window_samples"], | |
frame_length=spec_params["window_samples"], | |
hop_length=spec_params["hop_size"], | |
) | |
# extract max dBFS | |
max_dBFS = 20 * np.log10(rms_time_series.max() * np.sqrt(2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment