Last active
January 9, 2023 05:37
-
-
Save sourceperl/c1444a99992a58a0ba6987beaa2daec0 to your computer and use it in GitHub Desktop.
Display sound spectral view with scipy FFT and matplotlib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# detect tones in sound spectrum with scipy FFT | |
# here sound source is a USB microphone with ALSA (channel 1) | |
from collections import deque | |
import struct | |
import sys | |
import time | |
import threading | |
import alsaaudio | |
import numpy as np | |
from scipy.fftpack import fft | |
# some const | |
# 44100 Hz sampling rate (for 0-22050 Hz view, 0.0227ms/sample) | |
SAMPLE_FREQ = 44100 | |
# 20000 samples buffer size (454 ms) | |
NB_SAMPLE = 20000 | |
class Sampler(threading.Thread): | |
def __init__(self, cardindex=-1): | |
# init thread | |
threading.Thread.__init__(self) | |
self.daemon = True | |
# init ALSA audio | |
self.inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, cardindex=cardindex) | |
# set attributes: Mono, frequency, 16 bit little endian samples | |
self.inp.setchannels(1) | |
self.inp.setrate(SAMPLE_FREQ) | |
self.inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) | |
self.inp.setperiodsize(2048) | |
# sample FIFO | |
self._s_lock = threading.Lock() | |
self._s_fifo = deque([0] * NB_SAMPLE, maxlen=NB_SAMPLE) | |
def get_sample(self): | |
with self._s_lock: | |
return list(self._s_fifo) | |
def run(self): | |
while True: | |
# read data from device | |
l, data = self.inp.read() | |
if l > 0: | |
# extract and format sample (normalize sample to 1.0/-1.0 float) | |
raw_smp_l = struct.unpack('h' * l, data) | |
smp_l = (float(raw_smp / 32767) for raw_smp in raw_smp_l) | |
with self._s_lock: | |
self._s_fifo.extend(smp_l) | |
else: | |
print('sampler error occur (l=%s and len data=%s)' % (l, len(data)), file=sys.stderr) | |
if __name__ == '__main__': | |
# start sound sampler thread for sound card 1 (USB microphone) | |
spr = Sampler(cardindex=1) | |
spr.start() | |
# main loop | |
while True: | |
# read samples | |
samples = spr.get_sample() | |
# compute FFT | |
y_freq = fft(samples) | |
# level axe at each frequency: | |
# yf between 0.0 and 100.0 for every xf step | |
y_level = 1.0 / (NB_SAMPLE / 2) * np.abs(y_freq[0:NB_SAMPLE / 2]) * 100 | |
# find higher level frequency | |
index_max = np.argmax(y_level) | |
max_level = y_level[index_max] | |
max_freq = index_max * SAMPLE_FREQ / NB_SAMPLE | |
print('max f=%i Hz (lvl=%.02f %%)' % (max_freq, max_level)) | |
# detect 330 Hz and 440 Hz tones | |
level_330hz = y_level[round(330 * NB_SAMPLE/SAMPLE_FREQ)] | |
level_440hz = y_level[round(440 * NB_SAMPLE/SAMPLE_FREQ)] | |
print('level at 330 Hz %.02f %%' % level_330hz) | |
print('level at 440 Hz %.02f %%' % level_440hz) | |
if (327 < max_freq < 333) and (1.5 < max_level < 3.0): | |
print('%s: TONE DETECT (level=%.02f %%)' % ( | |
time.strftime("%d/%m/%Y %H:%M:%S", time.localtime()), max_level)) | |
time.sleep(0.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# bash GUI to see or change cards params (press <F6> to select USB sound card, <F4> to select Micro/Capture view) | |
alsamixer | |
# command line for set Mic gain at 16 db for sound card 1 : | |
amixer -c1 set Mic 16dB | |
# backup all sounds params set with alsamixer or amixer : | |
sudo alsactl store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# vars | |
NAME=$(basename $0) | |
# check root | |
[ $EUID -ne 0 ] && { printf "ERROR: $NAME needs to be run by root\n" 1>&2; exit 1; } | |
# install alsa audio python module | |
apt-get install libasound2-dev python3-pip | |
pip3 install pyalsaaudio | |
# install matplotlib and scipy | |
apt-get install python3-matplotlib python3-scipy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# display sound spectral view with scipy FFT and matplotlib | |
# here sound source is the system microphone with ALSA (channel 1) | |
from collections import deque | |
import struct | |
import sys | |
import threading | |
import alsaaudio | |
import matplotlib.pyplot as plt | |
import matplotlib.animation as animation | |
import numpy as np | |
from scipy.fftpack import fft | |
# some const | |
# 44100 Hz sampling rate (for 0-22050 Hz view, 0.0227ms/sample) | |
SAMPLE_FREQ = 44100 | |
# 66000 samples buffer size (near 1.5 second) | |
NB_SAMPLE = 66000 | |
class Sampler(threading.Thread): | |
def __init__(self, cardindex=-1): | |
# init thread | |
threading.Thread.__init__(self) | |
self.daemon = True | |
# init ALSA audio | |
self.inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, cardindex=cardindex) | |
# set attributes: Mono, frequency, 16 bit little endian samples | |
self.inp.setchannels(1) | |
self.inp.setrate(SAMPLE_FREQ) | |
self.inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) | |
self.inp.setperiodsize(512) | |
# sample FIFO | |
self._s_lock = threading.Lock() | |
self._s_fifo = deque([0] * NB_SAMPLE, maxlen=NB_SAMPLE) | |
def get_sample(self): | |
with self._s_lock: | |
return list(self._s_fifo) | |
def run(self): | |
while True: | |
# read data from device | |
l, data = self.inp.read() | |
if l > 0: | |
# extract and format sample (normalize sample to 1.0/-1.0 float) | |
raw_smp_l = struct.unpack('h' * l, data) | |
smp_l = (float(raw_smp / 32767) for raw_smp in raw_smp_l) | |
with self._s_lock: | |
self._s_fifo.extend(smp_l) | |
else: | |
print('sampler error occur (l=%s and len data=%s)' % (l, len(data)), file=sys.stderr) | |
def plot_anim(i): | |
# read samples | |
samples = spr.get_sample() | |
# compute FFT | |
y_freq = fft(samples) | |
# frequency axe in Hz: | |
# 0.0 to max frequency (= sample rate/2), number of step is half of NB_SAMPLE | |
x_freq = np.linspace(0.0, SAMPLE_FREQ / 2, NB_SAMPLE / 2) | |
# level axe at each frequency: | |
# yf between 0.0 and 1.0 for every xf step | |
y_level = 1.0 / (NB_SAMPLE / 2) * np.abs(y_freq[0:NB_SAMPLE // 2]) * 100 | |
# wipe and redraw | |
ax1.clear() | |
ax1.set_axis_bgcolor('grey') | |
ax1.set_xlabel('Frequency (Hz)') | |
ax1.set_ylabel('Level (%)') | |
ax1.plot(x_freq, y_level, 'b', lw=2) | |
# find higher level frequency | |
index_max = np.argmax(y_level) | |
freq_max = x_freq[index_max] | |
print('max level at f=%i Hz (lvl=%.02f %%)' % (freq_max, y_level[index_max])) | |
#print('sample: max %.04f, min %.04f' % (max(samples), min(samples))) | |
if __name__ == '__main__': | |
# start sound sampler thread | |
spr = Sampler() | |
spr.start() | |
# init a dynamic plotter | |
fig, (ax1) = plt.subplots(nrows=1, ncols=1) | |
fig.canvas.set_window_title('Sound spectral view') | |
ani = animation.FuncAnimation(fig, plot_anim, interval=1000) | |
plt.show() |
Thanks, it's fixed.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In
setup.sh
I thinkpithon3-scipy
should bepython3-scipy
...