Skip to content

Instantly share code, notes, and snippets.

@p-i-
Created October 18, 2024 10:09
Show Gist options
  • Save p-i-/598da13d2a1a1e2a6ec978e15fa7d892 to your computer and use it in GitHub Desktop.
Save p-i-/598da13d2a1a1e2a6ec978e15fa7d892 to your computer and use it in GitHub Desktop.
WebRTC VAD (c++)

1. original fails hard.

2. Rework works!

Just clone it and pip install -e .

3. CMake build

Place CMakeLists.txt and hello.cpp alongside cbits/

mkdir build;  cd build
cmake ..  &&  make  &&  ./test_vad

Output:

pi@πlocal ~/code/2024/kit/test_projects/vad/WebRTC_vad/build main
> ./test_vad 
Voice detected in frame 1
Voice detected in frame 2
Voice detected in frame 3
Voice detected in frame 4
Voice detected in frame 5
Voice detected in frame 6
Voice detected in frame 7
Voice detected in frame 8
Voice detected in frame 9
Voice detected in frame 10
Voice detected in frame 11
No voice detected in frame 12
No voice detected in frame 13
No voice detected in frame 14
No voice detected in frame 15
No voice detected in frame 16
...
Voice detected in frame 83
Voice detected in frame 84
No voice detected in frame 85
No voice detected in frame 86
No voice detected in frame 87
No voice detected in frame 88
No voice detected in frame 89
No voice detected in frame 90
cmake_minimum_required(VERSION 3.10)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)
# Set the project name and version
project(webrtcvad VERSION 2.0.14)
# Define the source files (excluding pywebrtcvad.c)
file(GLOB VAD_SOURCES
cbits/webrtc/common_audio/signal_processing/*.c
cbits/webrtc/common_audio/vad/*.c
hello.cpp # Add our new hello.cpp test file
)
# Define the include directories
include_directories(cbits)
# Define macros depending on the platform
if(WIN32)
add_definitions(-D_WIN32)
else()
add_definitions(-DWEBRTC_POSIX)
endif()
# Create the executable for hello.cpp
add_executable(test_vad ${VAD_SOURCES})
# Define the installation details
# install(TARGETS test_vad DESTINATION bin)
#include <iostream>
#include <fstream>
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
// Helper function to load raw audio from a file
std::vector<int16_t> loadAudio(const char* filename) {
std::ifstream file(filename, std::ios::binary);
if (!file) {
std::cerr << "Failed to open audio file!" << std::endl;
return {};
}
std::vector<int16_t> audioData((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
return audioData;
}
int main() {
VadInst* vad = nullptr;
// Create and initialize the VAD instance
if (WebRtcVad_Create(&vad) != 0 || vad == nullptr) {
std::cerr << "Failed to create VAD instance!" << std::endl;
return 1;
}
if (WebRtcVad_Init(vad) != 0) {
std::cerr << "Failed to initialize VAD!" << std::endl;
WebRtcVad_Free(vad);
return 1;
}
// Set VAD aggressiveness mode (0 to 3, where 3 is most aggressive)
if (WebRtcVad_set_mode(vad, 1) != 0) {
std::cerr << "Failed to set VAD mode!" << std::endl;
WebRtcVad_Free(vad);
return 1;
}
// Load raw audio file
const char* audioFile = "../test-audio.raw";
std::vector<int16_t> audioData = loadAudio(audioFile);
if (audioData.empty()) {
std::cerr << "Audio data is empty or failed to load!" << std::endl;
WebRtcVad_Free(vad);
return 1;
}
// Assume the sample rate is 16000 Hz and frame length is 160 samples (10 ms frames)
int sample_rate = 16000;
int frame_length = sample_rate / 100; // 10 ms frame
int num_frames = audioData.size() / frame_length;
// Process each frame
for (int i = 0; i < num_frames; ++i) {
const int16_t* frame = &audioData[i * frame_length];
int result = WebRtcVad_Process(vad, sample_rate, frame, frame_length);
if (result == 1) {
std::cout << "Voice detected in frame " << i + 1 << std::endl;
} else if (result == 0) {
std::cout << "No voice detected in frame " << i + 1 << std::endl;
} else {
std::cerr << "Error processing frame " << i + 1 << std::endl;
}
}
// Clean up
WebRtcVad_Free(vad);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment