If you want to convert a wav file to 16 kHz for whisper in Rust, you can use the following script. It uses the hound
crate to read and write wav files and rubato to resample the audio.
Reads a file as input.wav
and returns a converted wav as output.wav
.
use rubato::{
Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction,
};
fn resample(audio: &Vec<f32>, from_rate: f64) -> Vec<f32> {
let mut resampler = SincFixedIn::<f32>::new(
16000_f64 / from_rate,
10.0,
SincInterpolationParameters {
sinc_len: 256,
f_cutoff: 0.95,
interpolation: SincInterpolationType::Linear,
oversampling_factor: 256,
window: WindowFunction::BlackmanHarris2,
},
audio.len(),
1,
)
.unwrap();
let resampled = resampler.process(&[audio], None).unwrap();
let data = &resampled[0];
data.clone()
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = hound::WavReader::open("input.wav")?;
let spec = reader.spec();
let duration = reader.duration() as f32 / spec.sample_rate as f32;
let audio_data = reader
.samples::<f32>()
.map(|s| s.unwrap())
.collect::<Vec<f32>>();
println!("Wav file Duration: {} s at {} Hz", duration, spec.sample_rate);
let data = resample(&audio_data, spec.sample_rate as f64);
// write resampled audio to disk as output.wav
let mut writer = hound::WavWriter::create(
"output.wav",
hound::WavSpec {
channels: 1,
sample_rate: 16000,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
},
)?;
for &sample in &data {
writer.write_sample((sample * i16::MAX as f32) as i16)?;
}
Ok(())
}
This code is based on https://github.com/johnsnewby/terplounge and extended to read / write to wav files.