sammlapp · August 5, 2024 04:31
diff --git a/measure_volume.py b/measure_volume.py
 """this script extracts a measurement of sound pressure level from an audio file

 specifically, it 
 (1) calculates a spectrogram
 (2) subtracts a spectral noise profile based on the Nth percentile value of each spectrogram row
 (3) calibrates the spectrogram to sound pressure level using a calibration curve from a csv file
 (4) reconstructs a time series of rms values from the spectrogram
 (5) extracts the maximum value of rms and converts to dB
 """

 from opensoundscape import Audio, Spectrogram
 import scipy
 import numpy as np
 import librosa

 spec_params = {"window_samples": 512, "hop_size": 256}


 def calculate_calibrated_spec(
    audio,
    bandpass_range,
    background_noise_pct=25,
 ):
    """

    Args:
        audio: opensoundscape.Audio object
        bandpass_range: [low_f, high_f] to bandpass audio
        calibration_curve: dictionary of {freq:dB} to convert dBFS to dB SPL re 20 uPa for
            the digital recording; values are linearly interpolated onto the spectrogram's
            frequency values
        background_noise_pct: percentile of pixel to use as background noise estimate
            - background noise spectrum is computed row-wise on the spectrogram

    Returns:
        max value
    """
    # generate spectrogram
    # these settings provide a specrogram that can be summed back to the original amplitude
    # note the use of a rectangular window, 'magnitude' mode, and 'spectrum' scaling
    f, t, scipy_spec = scipy.signal.spectrogram(
        x=audio.samples,
        fs=audio.sample_rate,
        mode="magnitude",
        scaling="spectrum",
        nperseg=spec_params["window_samples"],
        nfft=spec_params["window_samples"],
        noverlap=spec_params["hop_size"],
        window=np.ones(spec_params["window_samples"]),
        return_onesided=True,
    )

    # bandpass the spectrogram to the desired frequency range (replace values outside bandpass range with 0)
    for i, fi in enumerate(f):
        if fi < bandpass_range[0] or fi > bandpass_range[1]:
            scipy_spec[i, :] = 0

    # make opensoundscape Spectrogram object
    s = Spectrogram(
        scipy_spec,
        f,
        t,
        window_samples=spec_params["window_samples"],
        overlap_samples=spec_params["window_samples"] - spec_params["hop_size"],
        audio_sample_rate=audio.sample_rate,
        scaling="spectrum",
    )

    # estimate the background noise as the [`background_noise_pct`]th percentile of each row of the spectrogram
    noise_spectrum = np.percentile(s.spectrogram, background_noise_pct, axis=1)

    # row-wise subtraction of noise from spectrogram
    noise_reduced = s.spectrogram - noise_spectrum[:, np.newaxis]
    eps = 1e-12
    noise_reduced[noise_reduced < eps] = eps

    # create new spectrogram object with noise-reduced values
    # add small epsilon to avoid issues if we later take the log10
    noise_reduced_spec = s._spawn(spectrogram=noise_reduced + eps)

    return noise_reduced_spec


 path = "example.wav"
 offset = 0  # seconds to start of clip from beginning of audio file
 duration = 3  # seconds
 bandpass_range = [3000, 5000]  # Hz, frequency range of target sound to measure

 audio = Audio.from_file(path, offset, duration)
 noise_reduced_spec = calculate_calibrated_spec(
    audio,
    bandpass_range,
    background_noise_pct=25,
 )

 # use frequency response curves to calibrate
 # assues you have a csv with columns: `freq` (in Hz) and `spectrum` (in dB) giving the
 # offset to convert from dBFS to dB SPL
 fr = pd.read_csv("frequency_response_magnitude_scaling.csv")
 # interpolate measured calibration curve values onto the frequencies of our spectrogram
 calibration_interpolated = np.interp(
    noise_reduced_spec.frequencies, fr.freq, fr.spectrum
 )
 # convert dB to linear coefficients
 calibration_interpolated_linear = np.exp(10, calibration_interpolated / 20)
 noise_reduced_logspec = noise_reduced_spec._spawn(
    spectrogram=20 * np.log10(noise_reduced_spec.spectrogram)
 )
 # apply calibration offsets per frequency
 s_calibrated = noise_reduced_logspec._spawn(
    spectrogram=noise_reduced_logspec.spectrogram
    * calibration_interpolated_linear[:, np.newaxis]
 )
 # (sum instead of multilply if using log values)

 # convert spec to rms time series using librosa
 rms_time_series = librosa.feature.rms(
    S=s_calibrated.spectrogram * spec_params["window_samples"],
    frame_length=spec_params["window_samples"],
    hop_length=spec_params["hop_size"],
 )

 # extract max dBFS
 max_dBFS = 20 * np.log10(rms_time_series.max() * np.sqrt(2))
	"""this script extracts a measurement of sound pressure level from an audio file

	specifically, it
	(1) calculates a spectrogram
	(2) subtracts a spectral noise profile based on the Nth percentile value of each spectrogram row
	(3) calibrates the spectrogram to sound pressure level using a calibration curve from a csv file
	(4) reconstructs a time series of rms values from the spectrogram
	(5) extracts the maximum value of rms and converts to dB
	"""

	from opensoundscape import Audio, Spectrogram
	import scipy
	import numpy as np
	import librosa

	spec_params = {"window_samples": 512, "hop_size": 256}


	def calculate_calibrated_spec(
	audio,
	bandpass_range,
	background_noise_pct=25,
	):
	"""

	Args:
	audio: opensoundscape.Audio object
	bandpass_range: [low_f, high_f] to bandpass audio
	calibration_curve: dictionary of {freq:dB} to convert dBFS to dB SPL re 20 uPa for
	the digital recording; values are linearly interpolated onto the spectrogram's
	frequency values
	background_noise_pct: percentile of pixel to use as background noise estimate
	- background noise spectrum is computed row-wise on the spectrogram

	Returns:
	max value
	"""
	# generate spectrogram
	# these settings provide a specrogram that can be summed back to the original amplitude
	# note the use of a rectangular window, 'magnitude' mode, and 'spectrum' scaling
	f, t, scipy_spec = scipy.signal.spectrogram(
	x=audio.samples,
	fs=audio.sample_rate,
	mode="magnitude",
	scaling="spectrum",
	nperseg=spec_params["window_samples"],
	nfft=spec_params["window_samples"],
	noverlap=spec_params["hop_size"],
	window=np.ones(spec_params["window_samples"]),
	return_onesided=True,
	)

	# bandpass the spectrogram to the desired frequency range (replace values outside bandpass range with 0)
	for i, fi in enumerate(f):
	if fi < bandpass_range[0] or fi > bandpass_range[1]:
	scipy_spec[i, :] = 0

	# make opensoundscape Spectrogram object
	s = Spectrogram(
	scipy_spec,
	f,
	t,
	window_samples=spec_params["window_samples"],
	overlap_samples=spec_params["window_samples"] - spec_params["hop_size"],
	audio_sample_rate=audio.sample_rate,
	scaling="spectrum",
	)

	# estimate the background noise as the [`background_noise_pct`]th percentile of each row of the spectrogram
	noise_spectrum = np.percentile(s.spectrogram, background_noise_pct, axis=1)

	# row-wise subtraction of noise from spectrogram
	noise_reduced = s.spectrogram - noise_spectrum[:, np.newaxis]
	eps = 1e-12
	noise_reduced[noise_reduced < eps] = eps

	# create new spectrogram object with noise-reduced values
	# add small epsilon to avoid issues if we later take the log10
	noise_reduced_spec = s._spawn(spectrogram=noise_reduced + eps)

	return noise_reduced_spec


	path = "example.wav"
	offset = 0 # seconds to start of clip from beginning of audio file
	duration = 3 # seconds
	bandpass_range = [3000, 5000] # Hz, frequency range of target sound to measure

	audio = Audio.from_file(path, offset, duration)
	noise_reduced_spec = calculate_calibrated_spec(
	audio,
	bandpass_range,
	background_noise_pct=25,
	)

	# use frequency response curves to calibrate
	# assues you have a csv with columns: `freq` (in Hz) and `spectrum` (in dB) giving the
	# offset to convert from dBFS to dB SPL
	fr = pd.read_csv("frequency_response_magnitude_scaling.csv")
	# interpolate measured calibration curve values onto the frequencies of our spectrogram
	calibration_interpolated = np.interp(
	noise_reduced_spec.frequencies, fr.freq, fr.spectrum
	)
	# convert dB to linear coefficients
	calibration_interpolated_linear = np.exp(10, calibration_interpolated / 20)
	noise_reduced_logspec = noise_reduced_spec._spawn(
	spectrogram=20 * np.log10(noise_reduced_spec.spectrogram)
	)
	# apply calibration offsets per frequency
	s_calibrated = noise_reduced_logspec._spawn(
	spectrogram=noise_reduced_logspec.spectrogram
	* calibration_interpolated_linear[:, np.newaxis]
	)
	# (sum instead of multilply if using log values)

	# convert spec to rms time series using librosa
	rms_time_series = librosa.feature.rms(
	S=s_calibrated.spectrogram * spec_params["window_samples"],
	frame_length=spec_params["window_samples"],
	hop_length=spec_params["hop_size"],
	)

	# extract max dBFS
	max_dBFS = 20 * np.log10(rms_time_series.max() * np.sqrt(2))