Skip to content

Instantly share code, notes, and snippets.

@kevinmoran
Last active April 7, 2025 15:13
Show Gist options
  • Save kevinmoran/3d05e190fb4e7f27c1043a3ba321cede to your computer and use it in GitHub Desktop.
Save kevinmoran/3d05e190fb4e7f27c1043a3ba321cede to your computer and use it in GitHub Desktop.
// Simple example code to load a Wav file and play it with WASAPI
// This is NOT complete Wav loading code. It is a barebones example
// that makes a lot of assumptions, see the assert() calls for details
//
// References:
// http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
// Handmade Hero Day 138: Loading WAV Files
#include <windows.h>
#include <mmdeviceapi.h>
#include <audioclient.h>
#include <assert.h>
#include <stdint.h>
// Struct to get data from loaded WAV file.
// NB: This will only work for WAV files containing PCM (non-compressed) data
// otherwise the layout will be different.
#pragma warning(disable : 4200)
struct WavFile {
// RIFF Chunk
uint32_t riffId;
uint32_t riffChunkSize;
uint32_t waveId;
// fmt Chunk
uint32_t fmtId;
uint32_t fmtChunkSize;
uint16_t formatCode;
uint16_t numChannels;
uint32_t sampleRate;
uint32_t byteRate;
uint16_t blockAlign;
uint16_t bitsPerSample;
// These are not present for PCM Wav Files
// uint16_t cbSize;
// uint16_t wValidBitsPerSample;
// uint32_t dwChannelMask;
// char subFormatGUID[16];
// data Chunk
uint32_t dataId;
uint32_t dataChunkSize;
uint16_t samples[]; // actual samples start here
};
#pragma warning(default : 4200)
bool win32LoadEntireFile(const char* filename, void** data, uint32_t* numBytesRead)
{
HANDLE file = CreateFileA(filename, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
if((file == INVALID_HANDLE_VALUE)) return false;
DWORD fileSize = GetFileSize(file, 0);
if(!fileSize) return false;
*data = HeapAlloc(GetProcessHeap(), 0, fileSize+1);
if(!*data) return false;
if(!ReadFile(file, *data, fileSize, (LPDWORD)numBytesRead, 0))
return false;
CloseHandle(file);
((uint8_t*)*data)[fileSize] = 0;
return true;
}
void Win32FreeFileData(void *data)
{
HeapFree(GetProcessHeap(), 0, data);
}
int main()
{
void* fileBytes;
uint32_t fileSize;
bool result = win32LoadEntireFile("filename.wav", &fileBytes, &fileSize);
assert(result);
WavFile* wav = (WavFile*)fileBytes;
// Check the Chunk IDs to make sure we loaded the file correctly
assert(wav->riffId == 1179011410);
assert(wav->waveId == 1163280727);
assert(wav->fmtId == 544501094);
assert(wav->dataId == 1635017060);
// Check data is in format we expect
assert(wav->formatCode == 1); // Only support PCM data
assert(wav->numChannels == 2); // Only support 2-channel data
assert(wav->fmtChunkSize == 16); // This should be true for PCM data
assert(wav->sampleRate == 44100); // Only support 44100Hz data
assert(wav->bitsPerSample == 16); // Only support 16-bit samples
// This is how these fields are defined, no harm to assert that they're what we expect
assert(wav->blockAlign == wav->numChannels * wav->bitsPerSample/8);
assert(wav->byteRate == wav->sampleRate * wav->blockAlign);
uint32_t numWavSamples = wav->dataChunkSize / sizeof(uint16_t);
uint16_t* wavSamples = wav->samples;
HRESULT hr = CoInitializeEx(nullptr, COINIT_SPEED_OVER_MEMORY);
assert(hr == S_OK);
IMMDeviceEnumerator* deviceEnumerator;
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (LPVOID*)(&deviceEnumerator));
assert(hr == S_OK);
IMMDevice* audioDevice;
hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &audioDevice);
assert(hr == S_OK);
deviceEnumerator->Release();
IAudioClient2* audioClient;
hr = audioDevice->Activate(__uuidof(IAudioClient2), CLSCTX_ALL, nullptr, (LPVOID*)(&audioClient));
assert(hr == S_OK);
audioDevice->Release();
// WAVEFORMATEX* defaultMixFormat = NULL;
// hr = audioClient->GetMixFormat(&defaultMixFormat);
// assert(hr == S_OK);
WAVEFORMATEX mixFormat = {};
mixFormat.wFormatTag = WAVE_FORMAT_PCM;
mixFormat.nChannels = 2;
mixFormat.nSamplesPerSec = 44100;//defaultMixFormat->nSamplesPerSec;
mixFormat.wBitsPerSample = 16;
mixFormat.nBlockAlign = (mixFormat.nChannels * mixFormat.wBitsPerSample) / 8;
mixFormat.nAvgBytesPerSec = mixFormat.nSamplesPerSec * mixFormat.nBlockAlign;
const float BUFFER_SIZE_IN_SECONDS = 2.0f;
const int64_t REFTIMES_PER_SEC = 10000000; // hundred nanoseconds
REFERENCE_TIME requestedSoundBufferDuration = (REFERENCE_TIME)(REFTIMES_PER_SEC * BUFFER_SIZE_IN_SECONDS);
DWORD initStreamFlags = ( AUDCLNT_STREAMFLAGS_RATEADJUST
| AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM
| AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY );
hr = audioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
initStreamFlags,
requestedSoundBufferDuration,
0, &mixFormat, nullptr);
assert(hr == S_OK);
IAudioRenderClient* audioRenderClient;
hr = audioClient->GetService(__uuidof(IAudioRenderClient), (LPVOID*)(&audioRenderClient));
assert(hr == S_OK);
UINT32 bufferSizeInFrames;
hr = audioClient->GetBufferSize(&bufferSizeInFrames);
assert(hr == S_OK);
hr = audioClient->Start();
assert(hr == S_OK);
int wavPlaybackSample = 0;
while (true)
{
// Padding is how much valid data is queued up in the sound buffer
// if there's enough padding then we could skip writing more data
UINT32 bufferPadding;
hr = audioClient->GetCurrentPadding(&bufferPadding);
assert(hr == S_OK);
// How much padding we want our sound buffer to have after writing to it.
// Needs to be enough so that the playback doesn't reach garbage data
// but we get less latency the lower it is (i.e. how long does it take
// between pressing jump and hearing the sound effect)
// Try setting this to e.g. 1/250.f to hear what happens when
// we're not writing enough data to stay ahead of playback!
const float TARGET_BUFFER_PADDING_IN_SECONDS = 1/60.f;
UINT32 targetBufferPadding = UINT32(bufferSizeInFrames * TARGET_BUFFER_PADDING_IN_SECONDS);
UINT32 numFramesToWrite = targetBufferPadding - bufferPadding;
int16_t* buffer;
hr = audioRenderClient->GetBuffer(numFramesToWrite, (BYTE**)(&buffer));
assert(hr == S_OK);
for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
{
*buffer++ = wavSamples[wavPlaybackSample++]; // left
*buffer++ = wavSamples[wavPlaybackSample++]; // right
wavPlaybackSample %= numWavSamples; // Loop if we reach end of wav file
}
hr = audioRenderClient->ReleaseBuffer(numFramesToWrite, 0);
assert(hr == S_OK);
// Get playback cursor position
// This is good for visualising playback and seeing the reading/writing in action!
IAudioClock* audioClock;
audioClient->GetService(__uuidof(IAudioClock), (LPVOID*)(&audioClock));
UINT64 audioPlaybackFreq;
UINT64 audioPlaybackPos;
audioClock->GetFrequency(&audioPlaybackFreq);
audioClock->GetPosition(&audioPlaybackPos, 0);
audioClock->Release();
// UINT64 audioPlaybackPosInSeconds = audioPlaybackPos/audioPlaybackFreq;
// UINT64 audioPlaybackPosInSamples = audioPlaybackPosInSeconds*mixFormat.nSamplesPerSec;
}
audioClient->Stop();
audioClient->Release();
audioRenderClient->Release();
Win32FreeFileData(fileBytes);
return 0;
}
@onigumo69
Copy link

Everything works fine but the sound is dramatically different than playing the wav file locally.
Any ideas?

@kevinmoran
Copy link
Author

I would usually expect the problem to be that the sound you're playing has a different sampling frequency or channel count to the WAVEFORMATEX of the device, but the assert()s should have caught that. Check:

  1. Are you definitely compiling without NDEBUG defined, i.e. are the assert()s working correctly?
  2. Does this only happen with one specific audio file? Try a few, and see if they all have the same number of channels, bits per sample and sampling frequency (aka nSamplesPerSec).

Otherwise, how exactly does the audio sound? Slowed down, sped up, etc? If it's a royalty-free clip you can send it to me and I'll try to check it out.

Apologies, this is an old, extremely barebones sample and I must get around to putting together a better resource.

@onigumo69
Copy link

  1. Yes it's in release mode and assert() is working correctly
  2. Sorry I didn't find any other wav files that can be played except this one:
    https://file-examples.com/index.php/sample-audio-files/sample-wav-download/
    I was testing the 10MB version but it sounds like the 1MB version through the program comparing to play it locally

It would be unspeakable appreciated if you can take a look,
and maybe like you said, provide a better solution for it,
since there are just none audio stuff out there, thanks

@kevinmoran
Copy link
Author

I just tried those files;
"file_example_WAV_1MG.wav" has a sampling frequency of 8000 Hz, so the assertion on line 91 fails when I try to play it
"file_example_WAV_2MG.wav" has a sampling frequency of 16000 Hz so it also crashes
"file_example_WAV_5MG.wav" and "file_example_WAV_10MG.wav" play correctly for me.

If you're compiling in release mode then the assertions get removed by the compiler so that's why you didn't get any crashes. I'm not sure why the 10MB version sound weird for you though. Try a debug build to see if any other assertions are failing?

@kevinmoran
Copy link
Author

Oh I totally forgot, I started working on some more complete sample code here:
https://github.com/kevinmoran/BeginnerWASAPI

The sample for pitch-shifting a wav file shows how to sample a wav at an arbitrary rate, which is also how you play back sounds with sampling frequencies that don't match the mix format's.

@onigumo69
Copy link

The old one ( code at this page ) does have sound quality issues.
This new one ("02. Playing a Wav File") works like a charm, the sound quality is exactly the same.
The difference is at the "write_sample" part.
old:

		for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
		{
			*buffer++ = wavSamples[wavPlaybackSample]; // left
			*buffer++ = wavSamples[wavPlaybackSample]; // right

			++wavPlaybackSample;
			wavPlaybackSample %= numWavSamples;
		}

new:

		for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
		{
			uint32_t leftSampleIndex = wav->numChannels * wavPlaybackSample;
			uint32_t rightSampleIndex = leftSampleIndex + wav->numChannels - 1;
			uint16_t leftSample = wav->samples[leftSampleIndex];
			uint16_t rightSample = wav->samples[rightSampleIndex];
			++wavPlaybackSample;
			*buffer++ = leftSample;
			*buffer++ = rightSample;

			if (wavPlaybackSample >= numWavSamples)
				wavPlaybackSample -= numWavSamples;
		}

Again, sorry to bother you with this kind of question but I'm really not a sound engineer or anything, and dealing with audio on Windows is just pure pain. And thanks for the quick reply, the new sample code and everything.
Currently I'm trying to do real time audio stuff on Windows:

  1. capturing the input audio data by microphones
  2. play the captured data in a specific playback device

Your play wav code has solved 2. step ( mainly is the write sample part which I don't understand, and I have implemented using the specific playback device myself) but I'm still struggling with the 1. step.

So the remaining issue is just how can I capture the microphone input audio data and save it as the above wavSamples format? ( or other format if that's more suitable, and the corresponding write sample method for that format if it exists )
Because right now I don't want to just play a local wav file, I want to play audio data directly being sent from other programs.

In short words is just I need both "read_sample" and "write_sample" methods.
Your play wav code is the "write_sample" but I'm still looking for the "read_sample"
Thanks again, you are a hero.

@rudolfninja
Copy link

@onigumo69 hello, have you solved your issue? I'm currently faced similar problem and would like to see some code that does the thing

@onigumo69
Copy link

@onigumo69 hello, have you solved your issue? I'm currently faced similar problem and would like to see some code that does the thing

I have not been working on that issue since back then but I do recommend this project and its owner's discord, he does these sort of things the best https://github.com/hikogui/hikogui

@nsfalex
Copy link

nsfalex commented Oct 11, 2024

I know it's been a while and I'm not sure if this is he issue @onigumo69 was referring to, but in this code sample the audio seems to play at half speed.
After some debugging I managed to figure out that this is due to the samples per second in the WAVEFORMATEX struct not accounting for stereo input data on line 126, and being set to half the sampling rate required.

After changing mixFormat.nSamplesPerSec = 44100 to mixFormat.nSamplesPerSec = 44100 * 2 it plays at full speed and normally, but you also have to change wavPlaybackSample %= numWavSamples to wavPlaybackSample %= numWavSamples * 2 on line 184 to read through the entire input buffer and not have the audio loop halfway through.

Hopefully this is of some help !

@kevinmoran
Copy link
Author

I know it's been a while and I'm not sure if this is he issue @onigumo69 was referring to, but in this code sample the audio seems to play at half speed. After some debugging I managed to figure out that this is due to the samples per second in the WAVEFORMATEX struct not accounting for stereo input data on line 126, and being set to half the sampling rate required.

After changing mixFormat.nSamplesPerSec = 44100 to mixFormat.nSamplesPerSec = 44100 * 2 it plays at full speed and normally, but you also have to change wavPlaybackSample %= numWavSamples to wavPlaybackSample %= numWavSamples * 2 on line 184 to read through the entire input buffer and not have the audio loop halfway through.

Hopefully this is of some help !

Thanks for looking into this, you were exactly right about there being a playback speed bug. I have a different fix to yours though - We shouldn't mess with the mix format we're giving Windows to for rendering, so instead I'll change the line:
uint32_t numWavSamples = wav->dataChunkSize / (wav->numChannels * sizeof(uint16_t));
to:
uint32_t numWavSamples = wav->dataChunkSize / sizeof(uint16_t);

This is the actual bug, I was calculating number of samples per channel but treating it as the overall number of samples later in the code. Revision coming soon, thanks again for your help, and to onigumo69 for reporting (apologies I didn't get fixing it till now).

@nsfalex
Copy link

nsfalex commented Oct 15, 2024

I know it's been a while and I'm not sure if this is he issue @onigumo69 was referring to, but in this code sample the audio seems to play at half speed. After some debugging I managed to figure out that this is due to the samples per second in the WAVEFORMATEX struct not accounting for stereo input data on line 126, and being set to half the sampling rate required.
After changing mixFormat.nSamplesPerSec = 44100 to mixFormat.nSamplesPerSec = 44100 * 2 it plays at full speed and normally, but you also have to change wavPlaybackSample %= numWavSamples to wavPlaybackSample %= numWavSamples * 2 on line 184 to read through the entire input buffer and not have the audio loop halfway through.
Hopefully this is of some help !

Thanks for looking into this, you were exactly right about there being a playback speed bug. I have a different fix to yours though - We shouldn't mess with the mix format we're giving Windows to for rendering, so instead I'll change the line: uint32_t numWavSamples = wav->dataChunkSize / (wav->numChannels * sizeof(uint16_t)); to: uint32_t numWavSamples = wav->dataChunkSize / sizeof(uint16_t);

This is the actual bug, I was calculating number of samples per channel but treating it as the overall number of samples later in the code. Revision coming soon, thanks again for your help, and to onigumo69 for reporting (apologies I didn't get fixing it till now).

I'm glad it helped! That's definitely a way more elegant solution, it seems like I really wasn't keeping the bigger picture in mind. I'll have to update this in the code I adapted, this solution is way more flexible when it comes to the amount of channels in the wave file.

@onigumo69
Copy link

good stuff guys

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment