leduyquang753 · December 6, 2024 11:31
diff --git a/ColoredAudioWaveformGenerator.cpp b/ColoredAudioWaveformGenerator.cpp
 /*
 	Colored audio waveform generator.
 	The audio is split into 3 bands and then their intensity is assigned to the color channels as follows:
 		– Under 200 Hz: red.
 		– From 200 Hz to 3000 Hz: green.
 		– Over 3000 Hz: blue.
 	The audio file's path is passed as a command-line argument.

 	Dependencies:
 		– dr_libs: https://github.com/mackron/dr_libs
 		– LodePNG: https://github.com/lvandeve/lodepng
 */

 #include <algorithm>
 #include <array>
 #include <cmath>
 #include <codecvt>
 #include <cstdlib>
 #include <filesystem>
 #include <fstream>
 #include <ios>
 #include <iostream>
 #include <limits>
 #include <locale>
 #include <string>
 #include <vector>

 #define DR_FLAC_IMPLEMENTATION
 #define DR_MP3_IMPLEMENTATION
 #define DR_WAV_IMPLEMENTATION
 #include "dr_flac.h"
 #include "dr_mp3.h"
 #include "dr_wav.h"
 #include "lodepng.h"

 constexpr float pi = 3.14159265358979f;

 constexpr int
 	pixelsPerSecond = 53,
 	waveformHeight = 69,
 	padding = 16;
 constexpr float brightnessBoost = 1.25f;

 struct Filter {
 	float a1, a2, b0, b1, b2;

 	std::array<float, 4> state = {};
 	std::vector<float> output;

 	Filter(
 		const float a0, const float a1, const float a2, const float b0, const float b1, const float b2
 	): a1(a1/a0), a2(a2/a0), b0(b0/a0), b1(b1/a0), b2(b2/a0) {}

 	static Filter lowPass(const float sampleRate, const float frequency) {
 		const float
 			w0 = 2.f * pi * frequency / sampleRate,
 			cosw0 = std::cos(w0),
 			alpha = std::sin(w0) / std::sqrt(2.f);
 		return {
 			1.f + alpha, -2.f * cosw0, 1.f - alpha,
 			(1.f - cosw0) / 2.f, 1.f - cosw0, (1.f - cosw0) / 2.f
 		};
 	}

 	static Filter highPass(const float sampleRate, const float frequency) {
 		const float
 			w0 = 2.f * pi * frequency / sampleRate,
 			cosw0 = std::cos(w0),
 			alpha = std::sin(w0) / std::sqrt(2.f);
 		return {
 			1.f + alpha, -2.f * cosw0, 1.f - alpha,
 			(1.f + cosw0) / 2.f, -1.f - cosw0, (1.f + cosw0) / 2.f
 		};
 	}

 	void apply(const float *const begin, const float *const end) {
 		output.resize(end - begin);
 		float *out = output.data();
 		for (const float *in = begin; in != end; ++in, ++out) {
 			const float value
 				= b2*state[0] + b1*state[1] + b0*(*in)
 				- a2*state[2] - a1*state[3];
 			state[0] = state[1];
 			state[1] = *in;
 			state[2] = state[3];
 			state[3] = value;
 			*out = value;
 		}
 	}
 };

 float rms(const float *const begin, const float *const end) {
 	float sum = 0;
 	for (const float *sample = begin; sample != end; ++sample) {
 		const float value = *sample;
 		sum += value * value;
 	}
 	return std::sqrt(sum / static_cast<float>(end - begin));
 }

 int main(int argc, char **argv) {
 	if (argc < 2) {
 		std::cerr << "Usage: " << argv[0] << " <Input audio file> [Output image file]\n";
 		return 1;
 	}
 	std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
 	const auto path = std::filesystem::absolute(std::filesystem::path(converter.from_bytes(argv[1])));
 	if (!std::filesystem::exists(path)) {
 		std::cerr << "The specified file does not exist.\n";
 		return 2;
 	}
 	std::vector<char> fileData(std::filesystem::file_size(path));
 	{
 		std::ifstream file(path, std::ios::binary);
 		file.read(fileData.data(), fileData.size());
 	}
 	std::vector<float> samples;
 	unsigned channelCount, sampleRate;
 	drflac_uint64 frameCount;
 	{
 		float *rawSamples;
 		if (path.extension() == ".flac") {
 			rawSamples = drflac_open_memory_and_read_pcm_frames_f32(
 				fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr
 			);
 		} else if (path.extension() == ".mp3") {
 			drmp3_config config;
 			rawSamples = drmp3_open_memory_and_read_pcm_frames_f32(
 				fileData.data(), fileData.size(), &config, &frameCount, nullptr
 			);
 			channelCount = config.channels;
 			sampleRate = config.sampleRate;
 		} else if (path.extension() == ".wav") {
 			rawSamples = drwav_open_memory_and_read_pcm_frames_f32(
 				fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr
 			);
 		} else {
 			std::cerr << "Supported file formats are FLAC, MP3 and WAV.\n";
 			return 3;
 		}
 		samples.resize(frameCount);
 		for (drflac_uint64 i = 0; i != frameCount; ++i) samples[i] = rawSamples[i * channelCount];
 		drflac_free(rawSamples, nullptr);
 	}

 	Filter
 		filterL = Filter::lowPass(sampleRate, 200),
 		filterML = Filter::highPass(sampleRate, 200),
 		filterMH = Filter::lowPass(sampleRate, 3000),
 		filterH = Filter::highPass(sampleRate, 3000);
 	const int samplesPerPixel = sampleRate / pixelsPerSecond;
 	const int
 		width = (frameCount + samplesPerPixel - 1) / samplesPerPixel,
 		height = waveformHeight + padding*2;
 	std::vector<unsigned char> image(width * height * 3);
 	std::vector<int> columnData(waveformHeight);
 	int x = 0, previousPosition = waveformHeight / 2;
 	for (int index = 0; index <= frameCount; ++index) {
 		const int newX = index / samplesPerPixel;
 		if (newX != x || index == frameCount) {
 			const float *const start = samples.data() + x * samplesPerPixel;
 			const float *const end = samples.data() + std::min(static_cast<int>(frameCount), (x+1) * samplesPerPixel);
 			filterL.apply(start, end);
 			filterML.apply(start, end);
 			filterMH.apply(&*filterML.output.begin(), &*filterML.output.end());
 			filterH.apply(start, end);
 			float
 				loudnessL = rms(&*filterL.output.begin(), &*filterL.output.end()),
 				loudnessM = rms(&*filterMH.output.begin(), &*filterMH.output.end()),
 				loudnessH = rms(&*filterH.output.begin(), &*filterH.output.end());
 			const float maxLoudness = std::max({loudnessL, loudnessM, loudnessH});
 			if (maxLoudness == 0.f) {
 				loudnessL = 1.f;
 				loudnessM = 1.f;
 				loudnessH = 1.f;
 			} else {
 				loudnessL /= maxLoudness;
 				loudnessM /= maxLoudness;
 				loudnessH /= maxLoudness;
 			}
 			const float
 				loudness = rms(start, end),
 				baseMultiplier = 255.f * std::min(1.f, loudness * waveformHeight),
 				rgInfluence = std::max(0.f, loudnessH - 1.f/3.f) / (2.f/3.f),
 				baseR = std::max(0.f, loudnessL * (1.164f - 0.164f*rgInfluence) * (1.f - loudnessM*0.28f)),
 				baseG = std::max(0.f, loudnessM * (1.087f - 0.087f*rgInfluence) * (1.f - loudnessL*0.33f)),
 				baseB = std::max(0.f, loudnessH * (3.6f - loudnessL*1.2f - loudnessM*0.8f)),
 				brightest = *std::max_element(columnData.begin(), columnData.end());
 			for (int y = 0; y != waveformHeight; ++y) {
 				const float multiplier = std::min(1.f, columnData[y] / brightest * brightnessBoost);
 				unsigned char *const pixel = image.data() + ((waveformHeight - 1 - y + padding) * width + x) * 3;
 				pixel[0] = baseMultiplier * std::min(1.f, multiplier * baseR);
 				pixel[1] = baseMultiplier * std::min(1.f, multiplier * baseG);
 				pixel[2] = baseMultiplier * std::min(1.f, multiplier * baseB);
 			}
 			std::fill(columnData.begin(), columnData.end(), 0);
 			x = newX;
 		}
 		if (index == frameCount) break;
 		const int
 			position = (samples[index] + 1.f) / 2.f * waveformHeight,
 			start = std::min(previousPosition, position),
 			end = std::max(previousPosition, position);
 		for (int y = start; y <= end; ++y) columnData[y] += 1;
 		previousPosition = position;
 	}
 	std::vector<unsigned char> pngData;
 	lodepng::encode(pngData, image, width, height, LCT_RGB);
 	std::ofstream imageFile(
 		argc > 2 ? std::filesystem::path(converter.from_bytes(argv[2])) : path.stem().concat(".png"),
 		std::ios::binary
 	);
 	imageFile.write(reinterpret_cast<char*>(pngData.data()), pngData.size());
 }
	/*
	Colored audio waveform generator.
	The audio is split into 3 bands and then their intensity is assigned to the color channels as follows:
	– Under 200 Hz: red.
	– From 200 Hz to 3000 Hz: green.
	– Over 3000 Hz: blue.
	The audio file's path is passed as a command-line argument.

	Dependencies:
	– dr_libs: https://github.com/mackron/dr_libs
	– LodePNG: https://github.com/lvandeve/lodepng
	*/

	#include <algorithm>
	#include <array>
	#include <cmath>
	#include <codecvt>
	#include <cstdlib>
	#include <filesystem>
	#include <fstream>
	#include <ios>
	#include <iostream>
	#include <limits>
	#include <locale>
	#include <string>
	#include <vector>

	#define DR_FLAC_IMPLEMENTATION
	#define DR_MP3_IMPLEMENTATION
	#define DR_WAV_IMPLEMENTATION
	#include "dr_flac.h"
	#include "dr_mp3.h"
	#include "dr_wav.h"
	#include "lodepng.h"

	constexpr float pi = 3.14159265358979f;

	constexpr int
	pixelsPerSecond = 53,
	waveformHeight = 69,
	padding = 16;
	constexpr float brightnessBoost = 1.25f;

	struct Filter {
	float a1, a2, b0, b1, b2;

	std::array<float, 4> state = {};
	std::vector<float> output;

	Filter(
	const float a0, const float a1, const float a2, const float b0, const float b1, const float b2
	): a1(a1/a0), a2(a2/a0), b0(b0/a0), b1(b1/a0), b2(b2/a0) {}

	static Filter lowPass(const float sampleRate, const float frequency) {
	const float
	w0 = 2.f * pi * frequency / sampleRate,
	cosw0 = std::cos(w0),
	alpha = std::sin(w0) / std::sqrt(2.f);
	return {
	1.f + alpha, -2.f * cosw0, 1.f - alpha,
	(1.f - cosw0) / 2.f, 1.f - cosw0, (1.f - cosw0) / 2.f
	};
	}

	static Filter highPass(const float sampleRate, const float frequency) {
	const float
	w0 = 2.f * pi * frequency / sampleRate,
	cosw0 = std::cos(w0),
	alpha = std::sin(w0) / std::sqrt(2.f);
	return {
	1.f + alpha, -2.f * cosw0, 1.f - alpha,
	(1.f + cosw0) / 2.f, -1.f - cosw0, (1.f + cosw0) / 2.f
	};
	}

	void apply(const float const begin, const float const end) {
	output.resize(end - begin);
	float *out = output.data();
	for (const float *in = begin; in != end; ++in, ++out) {
	const float value
	= b2state[0] + b1state[1] + b0(in)
	- a2state[2] - a1state[3];
	state[0] = state[1];
	state[1] = *in;
	state[2] = state[3];
	state[3] = value;
	*out = value;
	}
	}
	};

	float rms(const float const begin, const float const end) {
	float sum = 0;
	for (const float *sample = begin; sample != end; ++sample) {
	const float value = *sample;
	sum += value * value;
	}
	return std::sqrt(sum / static_cast<float>(end - begin));
	}

	int main(int argc, char **argv) {
	if (argc < 2) {
	std::cerr << "Usage: " << argv[0] << " <Input audio file> [Output image file]\n";
	return 1;
	}
	std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
	const auto path = std::filesystem::absolute(std::filesystem::path(converter.from_bytes(argv[1])));
	if (!std::filesystem::exists(path)) {
	std::cerr << "The specified file does not exist.\n";
	return 2;
	}
	std::vector<char> fileData(std::filesystem::file_size(path));
	{
	std::ifstream file(path, std::ios::binary);
	file.read(fileData.data(), fileData.size());
	}
	std::vector<float> samples;
	unsigned channelCount, sampleRate;
	drflac_uint64 frameCount;
	{
	float *rawSamples;
	if (path.extension() == ".flac") {
	rawSamples = drflac_open_memory_and_read_pcm_frames_f32(
	fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr
	);
	} else if (path.extension() == ".mp3") {
	drmp3_config config;
	rawSamples = drmp3_open_memory_and_read_pcm_frames_f32(
	fileData.data(), fileData.size(), &config, &frameCount, nullptr
	);
	channelCount = config.channels;
	sampleRate = config.sampleRate;
	} else if (path.extension() == ".wav") {
	rawSamples = drwav_open_memory_and_read_pcm_frames_f32(
	fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr
	);
	} else {
	std::cerr << "Supported file formats are FLAC, MP3 and WAV.\n";
	return 3;
	}
	samples.resize(frameCount);
	for (drflac_uint64 i = 0; i != frameCount; ++i) samples[i] = rawSamples[i * channelCount];
	drflac_free(rawSamples, nullptr);
	}

	Filter
	filterL = Filter::lowPass(sampleRate, 200),
	filterML = Filter::highPass(sampleRate, 200),
	filterMH = Filter::lowPass(sampleRate, 3000),
	filterH = Filter::highPass(sampleRate, 3000);
	const int samplesPerPixel = sampleRate / pixelsPerSecond;
	const int
	width = (frameCount + samplesPerPixel - 1) / samplesPerPixel,
	height = waveformHeight + padding*2;
	std::vector<unsigned char> image(width * height * 3);
	std::vector<int> columnData(waveformHeight);
	int x = 0, previousPosition = waveformHeight / 2;
	for (int index = 0; index <= frameCount; ++index) {
	const int newX = index / samplesPerPixel;
	if (newX != x \|\| index == frameCount) {
	const float const start = samples.data() + x samplesPerPixel;
	const float const end = samples.data() + std::min(static_cast<int>(frameCount), (x+1) samplesPerPixel);
	filterL.apply(start, end);
	filterML.apply(start, end);
	filterMH.apply(&filterML.output.begin(), &filterML.output.end());
	filterH.apply(start, end);
	float
	loudnessL = rms(&filterL.output.begin(), &filterL.output.end()),
	loudnessM = rms(&filterMH.output.begin(), &filterMH.output.end()),
	loudnessH = rms(&filterH.output.begin(), &filterH.output.end());
	const float maxLoudness = std::max({loudnessL, loudnessM, loudnessH});
	if (maxLoudness == 0.f) {
	loudnessL = 1.f;
	loudnessM = 1.f;
	loudnessH = 1.f;
	} else {
	loudnessL /= maxLoudness;
	loudnessM /= maxLoudness;
	loudnessH /= maxLoudness;
	}
	const float
	loudness = rms(start, end),
	baseMultiplier = 255.f * std::min(1.f, loudness * waveformHeight),
	rgInfluence = std::max(0.f, loudnessH - 1.f/3.f) / (2.f/3.f),
	baseR = std::max(0.f, loudnessL * (1.164f - 0.164frgInfluence) (1.f - loudnessM*0.28f)),
	baseG = std::max(0.f, loudnessM * (1.087f - 0.087frgInfluence) (1.f - loudnessL*0.33f)),
	baseB = std::max(0.f, loudnessH * (3.6f - loudnessL1.2f - loudnessM0.8f)),
	brightest = *std::max_element(columnData.begin(), columnData.end());
	for (int y = 0; y != waveformHeight; ++y) {
	const float multiplier = std::min(1.f, columnData[y] / brightest * brightnessBoost);
	unsigned char const pixel = image.data() + ((waveformHeight - 1 - y + padding) width + x) * 3;
	pixel[0] = baseMultiplier * std::min(1.f, multiplier * baseR);
	pixel[1] = baseMultiplier * std::min(1.f, multiplier * baseG);
	pixel[2] = baseMultiplier * std::min(1.f, multiplier * baseB);
	}
	std::fill(columnData.begin(), columnData.end(), 0);
	x = newX;
	}
	if (index == frameCount) break;
	const int
	position = (samples[index] + 1.f) / 2.f * waveformHeight,
	start = std::min(previousPosition, position),
	end = std::max(previousPosition, position);
	for (int y = start; y <= end; ++y) columnData[y] += 1;
	previousPosition = position;
	}
	std::vector<unsigned char> pngData;
	lodepng::encode(pngData, image, width, height, LCT_RGB);
	std::ofstream imageFile(
	argc > 2 ? std::filesystem::path(converter.from_bytes(argv[2])) : path.stem().concat(".png"),
	std::ios::binary
	);
	imageFile.write(reinterpret_cast<char*>(pngData.data()), pngData.size());
	}