Last active
February 24, 2025 22:22
-
-
Save notalentgeek/48aeab398b6b74e3a9134a61b6b79a36 to your computer and use it in GitHub Desktop.
A simple proof of concept to extract pitch and volume of streamed audio from microphone with PyAudio.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a simple demonstration on how to stream | |
# audio from microphone and then extract the pitch | |
# and volume directly with help of PyAudio and Aubio | |
# Python libraries. The PyAudio is used to interface | |
# the computer microphone. While the Aubio is used as | |
# a pitch detection object. There is also NumPy | |
# as well to convert format between PyAudio into | |
# the Aubio. | |
import aubio | |
import numpy as num | |
import pyaudio | |
import sys | |
# Some constants for setting the PyAudio and the | |
# Aubio. | |
BUFFER_SIZE = 2048 | |
CHANNELS = 1 | |
FORMAT = pyaudio.paFloat32 | |
METHOD = "default" | |
SAMPLE_RATE = 44100 | |
HOP_SIZE = BUFFER_SIZE//2 | |
PERIOD_SIZE_IN_FRAME = HOP_SIZE | |
def main(args): | |
# Initiating PyAudio object. | |
pA = pyaudio.PyAudio() | |
# Open the microphone stream. | |
mic = pA.open(format=FORMAT, channels=CHANNELS, | |
rate=SAMPLE_RATE, input=True, | |
frames_per_buffer=PERIOD_SIZE_IN_FRAME) | |
# Initiating Aubio's pitch detection object. | |
pDetection = aubio.pitch(METHOD, BUFFER_SIZE, | |
HOP_SIZE, SAMPLE_RATE) | |
# Set unit. | |
pDetection.set_unit("Hz") | |
# Frequency under -40 dB will considered | |
# as a silence. | |
pDetection.set_silence(-40) | |
# Infinite loop! | |
while True: | |
# Always listening to the microphone. | |
data = mic.read(PERIOD_SIZE_IN_FRAME) | |
# Convert into number that Aubio understand. | |
samples = num.fromstring(data, | |
dtype=aubio.float_type) | |
# Finally get the pitch. | |
pitch = pDetection(samples)[0] | |
# Compute the energy (volume) | |
# of the current frame. | |
volume = num.sum(samples**2)/len(samples) | |
# Format the volume output so it only | |
# displays at most six numbers behind 0. | |
volume = "{:6f}".format(volume) | |
# Finally print the pitch and the volume. | |
print(str(pitch) + " " + str(volume)) | |
if __name__ == "__main__": main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment