Last active
October 29, 2023 16:44
-
-
Save rsokl/a28f5dfc413f79125345edaa8fd60733 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def local_peaks( | |
log_spectrogram: np.ndarray, amp_min: float, p_nn: int | |
) -> List[Tuple[int, int]]: | |
""" | |
Defines a local neighborhood and finds the local peaks | |
in the spectrogram, which must be larger than the | |
specified `amp_min`. | |
Parameters | |
---------- | |
log_spectrogram : numpy.ndarray, shape=(n_freq, n_time) | |
Log-scaled spectrogram. Columns are the periodograms of | |
successive segments of a frequency-time spectrum. | |
amp_min : float | |
Amplitude threshold applied to local maxima | |
p_nn : int | |
The neighborhood radius used for determining if a spectrogram value | |
is a local peak. Specified in spectrogram cells. | |
Returns | |
------- | |
List[Tuple[int, int]] | |
Time-bin and frequency-bin index-values of the local peaks in spectrogram. | |
Sorted by ascending frequency and then time. | |
Notes | |
----- | |
The local peaks are returned in column-major order for the spectrogram. | |
That is, the peaks are ordered by time. That is, we look for nearest | |
neighbors of increasing frequencies at the same times, and then move to | |
the next time bin. | |
""" | |
... | |
def plot_song( | |
song: Union[str, Path, _np.ndarray], | |
*, | |
sampling_rate: int = _defaults.SAMPLING_RATE, | |
min_frac_amp_cutoff: float = _defaults.MIN_FRAC_AMP_CUTOFF, | |
local_peak_nn_radius: int = _defaults.LOCAL_PEAK_NN_RADIUS, | |
) -> Tuple[Figure, Axes]: | |
"""Plot a spectrogram and fingerprint features for a song. | |
Parameters | |
---------- | |
song : Union[str, pathlib.Path, numpy.ndarray] | |
The filepath to a song-file, or the digital signal itself. | |
sampling_rate: int, optional (default=_defaults.SAMPLING_RATE) | |
The target sampling rate used to read in an audio file | |
min_frac_amp_cutoff: float, optional (default=_defaults.MIN_FRAC_AMP_CUTOFF) | |
The fractional portion of intensities for which the cutoff is selected. | |
E.g. frac_cut=0.8 will produce a cutoff intensity such that the bottom 80% | |
of intensities are excluded. | |
local_peak_nn_radius: int, optional (default=_defaults.LOCAL_PEAK_NN_RADIUS) | |
The neighborhood radius used for determining if a spectrogram value | |
is a local peak. Specified in spectrogram cells. | |
Returns | |
------- | |
Tuple[matplotlib.pyplot.Figure, matplotlib.pyplot.Axes]""" | |
from microphone.config import settings | |
from pathlib import Path | |
if isinstance(song, (str, Path)): | |
digital, fs = _librosa.load(str(song), sr=sampling_rate, mono=True) | |
elif isinstance(song, _np.ndarray): | |
digital = song | |
fs = settings.rate | |
else: | |
raise TypeError("`song` must be a path to a song or an audio signal array") | |
# get the spectrogram, along with the size of the | |
# frequency bins (`df`) and time bins (`dt`) | |
S, cut, fig, ax, df, dt = digital_to_spec( | |
digital, fs, frac_cut=min_frac_amp_cutoff, plot=True | |
) | |
# Find the positions of the local peaks in the spectrogram. | |
# The locations returned here are column/row indices. | |
peaks = local_peaks(S, cut, p_nn=local_peak_nn_radius) | |
t_loc, f_loc = zip(*peaks) | |
# We need to scale the time-indices by dt and the frequence-indices | |
# by df so that the locations of the local peaks are in the right | |
# place on the spectogram | |
times = dt * (_np.array(tuple(t_loc)) + 1) | |
freqs = df * (_np.array(tuple(f_loc)) + 0.5) # add 0.5 so peaks are in the middle of the bins | |
ax.scatter(times, freqs, s=4, color="white") | |
ax.set_xlabel("Time (sec)") | |
ax.set_ylabel("Frequency (Hz)") | |
return fig, ax |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment