hydren · March 4, 2025 20:20 · arminAnderson · Jul 22, 2022 · nhurde · Apr 25, 2023
diff --git a/sound_pitching_example.cpp b/sound_pitching_example.cpp
 /*
 * sound_pitching_example.cpp
 *
 *  Created on: 27 de dez de 2017
 *      Author: Carlos Faruolo
 */

 #include <SDL2/SDL.h>
 #include <SDL2/SDL_mixer.h>
 #include <iostream>
 #include <cstdlib>
 #include <cmath>

 /* global vars */
 Uint16 audioFormat;  // current audio format constant
 int audioFrequency,  // frequency rate of the current audio format
    audioChannelCount,  // number of channels of the current audio format
    audioAllocatedMixChannelsCount;  // number of mix channels allocated

 static inline Uint16 formatSampleSize(Uint16 format) { return (format & 0xFF) / 8; }

 // Get chunk time length (in ms) given its size and current audio format
 static int computeChunkLengthMillisec(int chunkSize)
 {
 	const Uint32 points = chunkSize / formatSampleSize(audioFormat);  // bytes / samplesize == sample points
 	const Uint32 frames = (points / audioChannelCount);  // sample points / channels == sample frames
 	return ((frames * 1000) / audioFrequency);  // (sample frames * 1000) / frequency == play length, in ms
 }

 // Custom handler object to control which part of the Mix_Chunk's audio data will be played, with which pitch-related modifications.
 // This needed to be a template because the actual Mix_Chunk's data format may vary (AUDIO_U8, AUDIO_S16, etc) and the data type varies with it (Uint8, Sint16, etc)
 // The AudioFormatType should be the data type that is compatible with the current SDL_mixer-initialized audio format.
 template<typename AudioFormatType>
 struct PlaybackSpeedEffectHandler
 {
 	const AudioFormatType* const chunkData;  // pointer to the chunk sample data (as array)
 	const float& speedFactor;  // the playback speed factor
 	float position;  // current position of the sound, in ms
 	const int duration;  // the duration of the sound, in ms
 	const int chunkSize;  // the size of the sound, as a number of indexes (or sample points). thinks of this as a array size when using the proper array type (instead of just Uint8*).
 	const bool loop;  // flags whether playback should stay looping
 	const bool attemptSelfHalting;  // flags whether playback should be halted by this callback when playback is finished
 	bool altered;  // true if this playback has been pitched by this handler

 	PlaybackSpeedEffectHandler(const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
 	: chunkData(reinterpret_cast<AudioFormatType*>(chunk.abuf)), speedFactor(speed),
 	  position(0), duration(computeChunkLengthMillisec(chunk.alen)),
 	  chunkSize(chunk.alen / formatSampleSize(audioFormat)),
 	  loop(loop), attemptSelfHalting(trySelfHalt), altered(false)
 	{}

 	// processing function to be able to change chunk speed/pitch.
 	void modifyStreamPlaybackSpeed(int mixChannel, void* stream, int length)
 	{
 		AudioFormatType* buffer = static_cast<AudioFormatType*>(stream);
 		const int bufferSize = length / sizeof(AudioFormatType);  // buffer size (as array)
 		const float speedFactor = this->speedFactor;  // take a "snapshot" of speed factor

 		// if there is still sound to be played
 		if(position < duration || loop)
 		{
 			const float delta = 1000.0 / audioFrequency,  // normal duration of each sample
 				    vdelta = delta * speedFactor;  // virtual stretched duration, scaled by 'speedFactor'

 			// if playback is unaltered and pitch is required (for the first time)
 			if(!altered && speedFactor != 1.0f)
 			    altered = true;  // flags playback modification and proceed to the pitch routine.

 			if(altered)  // if unaltered, this pitch routine is skipped
 			{
 				for(int i = 0; i < bufferSize; i += audioChannelCount)
 				{
 					const int j = i / audioChannelCount;  // j goes from 0 to size/channelCount, incremented 1 by 1
 					const float x = position + j * vdelta;  // get "virtual" index. its corresponding value will be interpolated.
 					const int k = floor(x / delta);  // get left index to interpolate from original chunk data (right index will be this plus 1)
 					const float prop = (x / delta) - k;  // get the proportion of the right value (left will be 1.0 minus this)

 					// usually just 2 channels: 0 (left) and 1 (right), but who knows...
 					for(int c = 0; c < audioChannelCount; c++)
 					{
 						// check if k will be within bounds
 						if(k * audioChannelCount + audioChannelCount - 1 < chunkSize || loop)
 						{
 							AudioFormatType v0 = chunkData[(  k   * audioChannelCount + c) % chunkSize],
 									// v_ = chunkData[((k-1) * channelCount + c) % chunkSize],
 									// v2 = chunkData[((k+2) * channelCount + c) % chunkSize],
 									v1 = chunkData[((k+1) * audioChannelCount + c) % chunkSize];

 							// put interpolated value on 'data'
 							// buffer[i + c] = (1 - prop) * v0 + prop * v1;  // linear interpolation
 							buffer[i + c] = v0 + prop * (v1 - v0);  // linear interpolation (single multiplication)
 							// buffer[i + c] = v0 + 0.5f * prop * ((prop - 3) * v0 - (prop - 2) * 2 * v1 + (prop - 1) * v2);  // quadratic interpolation
 							// buffer[i + c] = v0 + (prop / 6) * ((3 * prop - prop2 - 2) * v_ + (prop2 - 2 * prop - 1) * 3 * v0 + (prop - prop2 + 2) * 3 * v1 + (prop2 - 1) * v2);  // cubic interpolation
 							// buffer[i + c] = v0 + 0.5f * prop * ((2 * prop2 - 3 * prop - 1) * (v0 - v1) + (prop2 - 2 * prop + 1) * (v0 - v_) + (prop2 - prop) * (v2 - v2));  // cubic spline interpolation
 						}
 						else  // if k will be out of bounds (chunk bounds), it means we already finished; thus, we'll pass silence
 						{
 							buffer[i + c] = 0;
 						}
 					}
 				}
 			}

 			// update position
 			position += (bufferSize / audioChannelCount) * vdelta;

 			// reset position if looping
 			if(loop) while(position > duration)
 				position -= duration;
 		}
 		else  // if we already played the whole sound but finished earlier than expected by SDL_mixer (due to faster playback speed)
 		{
 			// set silence on the buffer since Mix_HaltChannel() poops out some of it for a few ms.
 			for(int i = 0; i < bufferSize; i++)
 				buffer[i] = 0;

 			if(attemptSelfHalting)
 				Mix_HaltChannel(mixChannel);  // XXX unsafe call, since it locks audio; but no safer solution was found yet...
 		}
 	}

 	// Mix_EffectFunc_t callback that redirects to handler method (handler passed via userData)
 	static void mixEffectFuncCallback(int channel, void* stream, int length, void* userData)
 	{
 		static_cast<PlaybackSpeedEffectHandler*>(userData)->modifyStreamPlaybackSpeed(channel, stream, length);
 	}

 	// Mix_EffectDone_t callback that deletes the handler at the end of the effect usage  (handler passed via userData)
 	static void mixEffectDoneCallback(int, void *userData)
 	{
 		delete static_cast<PlaybackSpeedEffectHandler*>(userData);
 	}

 	// function to register a handler to this channel for the next playback.
 	static void registerEffect(int channel, const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
 	{
 		Mix_RegisterEffect(channel, mixEffectFuncCallback, mixEffectDoneCallback, new PlaybackSpeedEffectHandler(chunk, speed, loop, trySelfHalt));
 	}
 };

 // Register playback speed effect handler according to the current audio format; effect valid for a single playback; if playback is looped, lasts until it's halted
 void setupPlaybackSpeedEffect(const Mix_Chunk* const chunk, const float& speed, int channel, bool loop=false, bool trySelfHalt=false)
 {
 	// select the register function for the current audio format and register the effect using the compatible handlers
 	// XXX is it correct to behave the same way to all S16 and U16 formats? Should we create case statements for AUDIO_S16SYS, AUDIO_S16LSB, AUDIO_S16MSB, etc, individually?
 	switch(audioFormat)
 	{
 		case AUDIO_U8:  PlaybackSpeedEffectHandler<Uint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
 		case AUDIO_S8:  PlaybackSpeedEffectHandler<Sint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
 		case AUDIO_U16: PlaybackSpeedEffectHandler<Uint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
 		default:
 		case AUDIO_S16: PlaybackSpeedEffectHandler<Sint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
 		case AUDIO_S32: PlaybackSpeedEffectHandler<Sint32>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
 		case AUDIO_F32: PlaybackSpeedEffectHandler<float >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
 	}
 }

 #define EXAMPLE_DURATION 8000

 // example
 // run the executable passing an filename of a sound file that SDL_mixer is able to open (ogg, wav, ...)
 int main(int argc, char** argv)
 {
 	if(argc < 2) { std::cout << "Missing argument." << std::endl; return 0; }

 	SDL_Init(SDL_INIT_AUDIO);
 	Mix_OpenAudio(MIX_DEFAULT_FREQUENCY, MIX_DEFAULT_FORMAT, MIX_DEFAULT_CHANNELS, 4096);
 	Mix_QuerySpec(&audioFrequency, &audioFormat, &audioChannelCount);  // query specs
 	audioAllocatedMixChannelsCount = Mix_AllocateChannels(MIX_CHANNELS);

 	Mix_Chunk* chunk = Mix_LoadWAV(argv[1]);
 	const float initialSpeed = argc > 2? atof(argv[2]) : 1.0;
 	const bool modulate = argc > 3? atoi(argv[3]) : false;

 	float speed = initialSpeed;
 	if(chunk != NULL)
 	{
 		const int channel = Mix_PlayChannelTimed(-1, chunk, -1, EXAMPLE_DURATION);
 		setupPlaybackSpeedEffect(chunk, speed, channel, true);
 		std::cout << "Looping for 8 seconds..." << std::endl;

 		// while looping, change pitch dynamically
 		if(modulate) while(SDL_GetTicks() < EXAMPLE_DURATION)
 			speed = initialSpeed + 0.25*sin(0.001*SDL_GetTicks());
 		else
 			SDL_Delay(EXAMPLE_DURATION);

 		std::cout << "Finished." << std::endl;
 	}
 	else
 		std::cout << "No data." << std::endl;

 	Mix_FreeChunk(chunk);
 	Mix_CloseAudio();
 	Mix_Quit();
 	SDL_Quit();
 	return EXIT_SUCCESS;
 }
	/*
	* sound_pitching_example.cpp
	*
	* Created on: 27 de dez de 2017
	* Author: Carlos Faruolo
	*/

	#include <SDL2/SDL.h>
	#include <SDL2/SDL_mixer.h>
	#include <iostream>
	#include <cstdlib>
	#include <cmath>

	/* global vars */
	Uint16 audioFormat; // current audio format constant
	int audioFrequency, // frequency rate of the current audio format
	audioChannelCount, // number of channels of the current audio format
	audioAllocatedMixChannelsCount; // number of mix channels allocated

	static inline Uint16 formatSampleSize(Uint16 format) { return (format & 0xFF) / 8; }

	// Get chunk time length (in ms) given its size and current audio format
	static int computeChunkLengthMillisec(int chunkSize)
	{
	const Uint32 points = chunkSize / formatSampleSize(audioFormat); // bytes / samplesize == sample points
	const Uint32 frames = (points / audioChannelCount); // sample points / channels == sample frames
	return ((frames * 1000) / audioFrequency); // (sample frames * 1000) / frequency == play length, in ms
	}

	// Custom handler object to control which part of the Mix_Chunk's audio data will be played, with which pitch-related modifications.
	// This needed to be a template because the actual Mix_Chunk's data format may vary (AUDIO_U8, AUDIO_S16, etc) and the data type varies with it (Uint8, Sint16, etc)
	// The AudioFormatType should be the data type that is compatible with the current SDL_mixer-initialized audio format.
	template<typename AudioFormatType>
	struct PlaybackSpeedEffectHandler
	{
	const AudioFormatType* const chunkData; // pointer to the chunk sample data (as array)
	const float& speedFactor; // the playback speed factor
	float position; // current position of the sound, in ms
	const int duration; // the duration of the sound, in ms
	const int chunkSize; // the size of the sound, as a number of indexes (or sample points). thinks of this as a array size when using the proper array type (instead of just Uint8*).
	const bool loop; // flags whether playback should stay looping
	const bool attemptSelfHalting; // flags whether playback should be halted by this callback when playback is finished
	bool altered; // true if this playback has been pitched by this handler

	PlaybackSpeedEffectHandler(const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
	: chunkData(reinterpret_cast<AudioFormatType*>(chunk.abuf)), speedFactor(speed),
	position(0), duration(computeChunkLengthMillisec(chunk.alen)),
	chunkSize(chunk.alen / formatSampleSize(audioFormat)),
	loop(loop), attemptSelfHalting(trySelfHalt), altered(false)
	{}

	// processing function to be able to change chunk speed/pitch.
	void modifyStreamPlaybackSpeed(int mixChannel, void* stream, int length)
	{
	AudioFormatType* buffer = static_cast<AudioFormatType*>(stream);
	const int bufferSize = length / sizeof(AudioFormatType); // buffer size (as array)
	const float speedFactor = this->speedFactor; // take a "snapshot" of speed factor

	// if there is still sound to be played
	if(position < duration \|\| loop)
	{
	const float delta = 1000.0 / audioFrequency, // normal duration of each sample
	vdelta = delta * speedFactor; // virtual stretched duration, scaled by 'speedFactor'

	// if playback is unaltered and pitch is required (for the first time)
	if(!altered && speedFactor != 1.0f)
	altered = true; // flags playback modification and proceed to the pitch routine.

	if(altered) // if unaltered, this pitch routine is skipped
	{
	for(int i = 0; i < bufferSize; i += audioChannelCount)
	{
	const int j = i / audioChannelCount; // j goes from 0 to size/channelCount, incremented 1 by 1
	const float x = position + j * vdelta; // get "virtual" index. its corresponding value will be interpolated.
	const int k = floor(x / delta); // get left index to interpolate from original chunk data (right index will be this plus 1)
	const float prop = (x / delta) - k; // get the proportion of the right value (left will be 1.0 minus this)

	// usually just 2 channels: 0 (left) and 1 (right), but who knows...
	for(int c = 0; c < audioChannelCount; c++)
	{
	// check if k will be within bounds
	if(k * audioChannelCount + audioChannelCount - 1 < chunkSize \|\| loop)
	{
	AudioFormatType v0 = chunkData[( k * audioChannelCount + c) % chunkSize],
	// v_ = chunkData[((k-1) * channelCount + c) % chunkSize],
	// v2 = chunkData[((k+2) * channelCount + c) % chunkSize],
	v1 = chunkData[((k+1) * audioChannelCount + c) % chunkSize];

	// put interpolated value on 'data'
	// buffer[i + c] = (1 - prop) * v0 + prop * v1; // linear interpolation
	buffer[i + c] = v0 + prop * (v1 - v0); // linear interpolation (single multiplication)
	// buffer[i + c] = v0 + 0.5f * prop * ((prop - 3) * v0 - (prop - 2) * 2 * v1 + (prop - 1) * v2); // quadratic interpolation
	// buffer[i + c] = v0 + (prop / 6) * ((3 * prop - prop2 - 2) * v_ + (prop2 - 2 * prop - 1) * 3 * v0 + (prop - prop2 + 2) * 3 * v1 + (prop2 - 1) * v2); // cubic interpolation
	// buffer[i + c] = v0 + 0.5f * prop * ((2 * prop2 - 3 * prop - 1) * (v0 - v1) + (prop2 - 2 * prop + 1) * (v0 - v_) + (prop2 - prop) * (v2 - v2)); // cubic spline interpolation
	}
	else // if k will be out of bounds (chunk bounds), it means we already finished; thus, we'll pass silence
	{
	buffer[i + c] = 0;
	}
	}
	}
	}

	// update position
	position += (bufferSize / audioChannelCount) * vdelta;

	// reset position if looping
	if(loop) while(position > duration)
	position -= duration;
	}
	else // if we already played the whole sound but finished earlier than expected by SDL_mixer (due to faster playback speed)
	{
	// set silence on the buffer since Mix_HaltChannel() poops out some of it for a few ms.
	for(int i = 0; i < bufferSize; i++)
	buffer[i] = 0;

	if(attemptSelfHalting)
	Mix_HaltChannel(mixChannel); // XXX unsafe call, since it locks audio; but no safer solution was found yet...
	}
	}

	// Mix_EffectFunc_t callback that redirects to handler method (handler passed via userData)
	static void mixEffectFuncCallback(int channel, void* stream, int length, void* userData)
	{
	static_cast<PlaybackSpeedEffectHandler*>(userData)->modifyStreamPlaybackSpeed(channel, stream, length);
	}

	// Mix_EffectDone_t callback that deletes the handler at the end of the effect usage (handler passed via userData)
	static void mixEffectDoneCallback(int, void *userData)
	{
	delete static_cast<PlaybackSpeedEffectHandler*>(userData);
	}

	// function to register a handler to this channel for the next playback.
	static void registerEffect(int channel, const Mix_Chunk& chunk, const float& speed, bool loop, bool trySelfHalt)
	{
	Mix_RegisterEffect(channel, mixEffectFuncCallback, mixEffectDoneCallback, new PlaybackSpeedEffectHandler(chunk, speed, loop, trySelfHalt));
	}
	};

	// Register playback speed effect handler according to the current audio format; effect valid for a single playback; if playback is looped, lasts until it's halted
	void setupPlaybackSpeedEffect(const Mix_Chunk* const chunk, const float& speed, int channel, bool loop=false, bool trySelfHalt=false)
	{
	// select the register function for the current audio format and register the effect using the compatible handlers
	// XXX is it correct to behave the same way to all S16 and U16 formats? Should we create case statements for AUDIO_S16SYS, AUDIO_S16LSB, AUDIO_S16MSB, etc, individually?
	switch(audioFormat)
	{
	case AUDIO_U8: PlaybackSpeedEffectHandler<Uint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
	case AUDIO_S8: PlaybackSpeedEffectHandler<Sint8 >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
	case AUDIO_U16: PlaybackSpeedEffectHandler<Uint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
	default:
	case AUDIO_S16: PlaybackSpeedEffectHandler<Sint16>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
	case AUDIO_S32: PlaybackSpeedEffectHandler<Sint32>::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
	case AUDIO_F32: PlaybackSpeedEffectHandler<float >::registerEffect(channel, *chunk, speed, loop, trySelfHalt); break;
	}
	}

	#define EXAMPLE_DURATION 8000

	// example
	// run the executable passing an filename of a sound file that SDL_mixer is able to open (ogg, wav, ...)
	int main(int argc, char** argv)
	{
	if(argc < 2) { std::cout << "Missing argument." << std::endl; return 0; }

	SDL_Init(SDL_INIT_AUDIO);
	Mix_OpenAudio(MIX_DEFAULT_FREQUENCY, MIX_DEFAULT_FORMAT, MIX_DEFAULT_CHANNELS, 4096);
	Mix_QuerySpec(&audioFrequency, &audioFormat, &audioChannelCount); // query specs
	audioAllocatedMixChannelsCount = Mix_AllocateChannels(MIX_CHANNELS);

	Mix_Chunk* chunk = Mix_LoadWAV(argv[1]);
	const float initialSpeed = argc > 2? atof(argv[2]) : 1.0;
	const bool modulate = argc > 3? atoi(argv[3]) : false;

	float speed = initialSpeed;
	if(chunk != NULL)
	{
	const int channel = Mix_PlayChannelTimed(-1, chunk, -1, EXAMPLE_DURATION);
	setupPlaybackSpeedEffect(chunk, speed, channel, true);
	std::cout << "Looping for 8 seconds..." << std::endl;

	// while looping, change pitch dynamically
	if(modulate) while(SDL_GetTicks() < EXAMPLE_DURATION)
	speed = initialSpeed + 0.25sin(0.001SDL_GetTicks());
	else
	SDL_Delay(EXAMPLE_DURATION);

	std::cout << "Finished." << std::endl;
	}
	else
	std::cout << "No data." << std::endl;

	Mix_FreeChunk(chunk);
	Mix_CloseAudio();
	Mix_Quit();
	SDL_Quit();
	return EXIT_SUCCESS;
	}