Skip to content

Instantly share code, notes, and snippets.

@sammlapp
Created August 5, 2024 04:31
Show Gist options
  • Save sammlapp/7c7ebbbe265d2fd75f238d4e92f3d2be to your computer and use it in GitHub Desktop.
Save sammlapp/7c7ebbbe265d2fd75f238d4e92f3d2be to your computer and use it in GitHub Desktop.
"""this script extracts a measurement of sound pressure level from an audio file
specifically, it
(1) calculates a spectrogram
(2) subtracts a spectral noise profile based on the Nth percentile value of each spectrogram row
(3) calibrates the spectrogram to sound pressure level using a calibration curve from a csv file
(4) reconstructs a time series of rms values from the spectrogram
(5) extracts the maximum value of rms and converts to dB
"""
from opensoundscape import Audio, Spectrogram
import scipy
import numpy as np
import librosa
spec_params = {"window_samples": 512, "hop_size": 256}
def calculate_calibrated_spec(
audio,
bandpass_range,
background_noise_pct=25,
):
"""
Args:
audio: opensoundscape.Audio object
bandpass_range: [low_f, high_f] to bandpass audio
calibration_curve: dictionary of {freq:dB} to convert dBFS to dB SPL re 20 uPa for
the digital recording; values are linearly interpolated onto the spectrogram's
frequency values
background_noise_pct: percentile of pixel to use as background noise estimate
- background noise spectrum is computed row-wise on the spectrogram
Returns:
max value
"""
# generate spectrogram
# these settings provide a specrogram that can be summed back to the original amplitude
# note the use of a rectangular window, 'magnitude' mode, and 'spectrum' scaling
f, t, scipy_spec = scipy.signal.spectrogram(
x=audio.samples,
fs=audio.sample_rate,
mode="magnitude",
scaling="spectrum",
nperseg=spec_params["window_samples"],
nfft=spec_params["window_samples"],
noverlap=spec_params["hop_size"],
window=np.ones(spec_params["window_samples"]),
return_onesided=True,
)
# bandpass the spectrogram to the desired frequency range (replace values outside bandpass range with 0)
for i, fi in enumerate(f):
if fi < bandpass_range[0] or fi > bandpass_range[1]:
scipy_spec[i, :] = 0
# make opensoundscape Spectrogram object
s = Spectrogram(
scipy_spec,
f,
t,
window_samples=spec_params["window_samples"],
overlap_samples=spec_params["window_samples"] - spec_params["hop_size"],
audio_sample_rate=audio.sample_rate,
scaling="spectrum",
)
# estimate the background noise as the [`background_noise_pct`]th percentile of each row of the spectrogram
noise_spectrum = np.percentile(s.spectrogram, background_noise_pct, axis=1)
# row-wise subtraction of noise from spectrogram
noise_reduced = s.spectrogram - noise_spectrum[:, np.newaxis]
eps = 1e-12
noise_reduced[noise_reduced < eps] = eps
# create new spectrogram object with noise-reduced values
# add small epsilon to avoid issues if we later take the log10
noise_reduced_spec = s._spawn(spectrogram=noise_reduced + eps)
return noise_reduced_spec
path = "example.wav"
offset = 0 # seconds to start of clip from beginning of audio file
duration = 3 # seconds
bandpass_range = [3000, 5000] # Hz, frequency range of target sound to measure
audio = Audio.from_file(path, offset, duration)
noise_reduced_spec = calculate_calibrated_spec(
audio,
bandpass_range,
background_noise_pct=25,
)
# use frequency response curves to calibrate
# assues you have a csv with columns: `freq` (in Hz) and `spectrum` (in dB) giving the
# offset to convert from dBFS to dB SPL
fr = pd.read_csv("frequency_response_magnitude_scaling.csv")
# interpolate measured calibration curve values onto the frequencies of our spectrogram
calibration_interpolated = np.interp(
noise_reduced_spec.frequencies, fr.freq, fr.spectrum
)
# convert dB to linear coefficients
calibration_interpolated_linear = np.exp(10, calibration_interpolated / 20)
noise_reduced_logspec = noise_reduced_spec._spawn(
spectrogram=20 * np.log10(noise_reduced_spec.spectrogram)
)
# apply calibration offsets per frequency
s_calibrated = noise_reduced_logspec._spawn(
spectrogram=noise_reduced_logspec.spectrogram
* calibration_interpolated_linear[:, np.newaxis]
)
# (sum instead of multilply if using log values)
# convert spec to rms time series using librosa
rms_time_series = librosa.feature.rms(
S=s_calibrated.spectrogram * spec_params["window_samples"],
frame_length=spec_params["window_samples"],
hop_length=spec_params["hop_size"],
)
# extract max dBFS
max_dBFS = 20 * np.log10(rms_time_series.max() * np.sqrt(2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment