Skip to content

Instantly share code, notes, and snippets.

@leduyquang753
Last active December 6, 2024 11:31
Show Gist options
  • Save leduyquang753/a6ec0f3f5a5117223480c8cb6141341c to your computer and use it in GitHub Desktop.
Save leduyquang753/a6ec0f3f5a5117223480c8cb6141341c to your computer and use it in GitHub Desktop.
Colored audio waveform generator based on band splitting.
/*
Colored audio waveform generator.
The audio is split into 3 bands and then their intensity is assigned to the color channels as follows:
– Under 200 Hz: red.
– From 200 Hz to 3000 Hz: green.
– Over 3000 Hz: blue.
The audio file's path is passed as a command-line argument.
Dependencies:
– dr_libs: https://github.com/mackron/dr_libs
– LodePNG: https://github.com/lvandeve/lodepng
*/
#include <algorithm>
#include <array>
#include <cmath>
#include <codecvt>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <ios>
#include <iostream>
#include <limits>
#include <locale>
#include <string>
#include <vector>
#define DR_FLAC_IMPLEMENTATION
#define DR_MP3_IMPLEMENTATION
#define DR_WAV_IMPLEMENTATION
#include "dr_flac.h"
#include "dr_mp3.h"
#include "dr_wav.h"
#include "lodepng.h"
constexpr float pi = 3.14159265358979f;
constexpr int
pixelsPerSecond = 53,
waveformHeight = 69,
padding = 16;
constexpr float brightnessBoost = 1.25f;
struct Filter {
float a1, a2, b0, b1, b2;
std::array<float, 4> state = {};
std::vector<float> output;
Filter(
const float a0, const float a1, const float a2, const float b0, const float b1, const float b2
): a1(a1/a0), a2(a2/a0), b0(b0/a0), b1(b1/a0), b2(b2/a0) {}
static Filter lowPass(const float sampleRate, const float frequency) {
const float
w0 = 2.f * pi * frequency / sampleRate,
cosw0 = std::cos(w0),
alpha = std::sin(w0) / std::sqrt(2.f);
return {
1.f + alpha, -2.f * cosw0, 1.f - alpha,
(1.f - cosw0) / 2.f, 1.f - cosw0, (1.f - cosw0) / 2.f
};
}
static Filter highPass(const float sampleRate, const float frequency) {
const float
w0 = 2.f * pi * frequency / sampleRate,
cosw0 = std::cos(w0),
alpha = std::sin(w0) / std::sqrt(2.f);
return {
1.f + alpha, -2.f * cosw0, 1.f - alpha,
(1.f + cosw0) / 2.f, -1.f - cosw0, (1.f + cosw0) / 2.f
};
}
void apply(const float *const begin, const float *const end) {
output.resize(end - begin);
float *out = output.data();
for (const float *in = begin; in != end; ++in, ++out) {
const float value
= b2*state[0] + b1*state[1] + b0*(*in)
- a2*state[2] - a1*state[3];
state[0] = state[1];
state[1] = *in;
state[2] = state[3];
state[3] = value;
*out = value;
}
}
};
float rms(const float *const begin, const float *const end) {
float sum = 0;
for (const float *sample = begin; sample != end; ++sample) {
const float value = *sample;
sum += value * value;
}
return std::sqrt(sum / static_cast<float>(end - begin));
}
int main(int argc, char **argv) {
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <Input audio file> [Output image file]\n";
return 1;
}
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
const auto path = std::filesystem::absolute(std::filesystem::path(converter.from_bytes(argv[1])));
if (!std::filesystem::exists(path)) {
std::cerr << "The specified file does not exist.\n";
return 2;
}
std::vector<char> fileData(std::filesystem::file_size(path));
{
std::ifstream file(path, std::ios::binary);
file.read(fileData.data(), fileData.size());
}
std::vector<float> samples;
unsigned channelCount, sampleRate;
drflac_uint64 frameCount;
{
float *rawSamples;
if (path.extension() == ".flac") {
rawSamples = drflac_open_memory_and_read_pcm_frames_f32(
fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr
);
} else if (path.extension() == ".mp3") {
drmp3_config config;
rawSamples = drmp3_open_memory_and_read_pcm_frames_f32(
fileData.data(), fileData.size(), &config, &frameCount, nullptr
);
channelCount = config.channels;
sampleRate = config.sampleRate;
} else if (path.extension() == ".wav") {
rawSamples = drwav_open_memory_and_read_pcm_frames_f32(
fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr
);
} else {
std::cerr << "Supported file formats are FLAC, MP3 and WAV.\n";
return 3;
}
samples.resize(frameCount);
for (drflac_uint64 i = 0; i != frameCount; ++i) samples[i] = rawSamples[i * channelCount];
drflac_free(rawSamples, nullptr);
}
Filter
filterL = Filter::lowPass(sampleRate, 200),
filterML = Filter::highPass(sampleRate, 200),
filterMH = Filter::lowPass(sampleRate, 3000),
filterH = Filter::highPass(sampleRate, 3000);
const int samplesPerPixel = sampleRate / pixelsPerSecond;
const int
width = (frameCount + samplesPerPixel - 1) / samplesPerPixel,
height = waveformHeight + padding*2;
std::vector<unsigned char> image(width * height * 3);
std::vector<int> columnData(waveformHeight);
int x = 0, previousPosition = waveformHeight / 2;
for (int index = 0; index <= frameCount; ++index) {
const int newX = index / samplesPerPixel;
if (newX != x || index == frameCount) {
const float *const start = samples.data() + x * samplesPerPixel;
const float *const end = samples.data() + std::min(static_cast<int>(frameCount), (x+1) * samplesPerPixel);
filterL.apply(start, end);
filterML.apply(start, end);
filterMH.apply(&*filterML.output.begin(), &*filterML.output.end());
filterH.apply(start, end);
float
loudnessL = rms(&*filterL.output.begin(), &*filterL.output.end()),
loudnessM = rms(&*filterMH.output.begin(), &*filterMH.output.end()),
loudnessH = rms(&*filterH.output.begin(), &*filterH.output.end());
const float maxLoudness = std::max({loudnessL, loudnessM, loudnessH});
if (maxLoudness == 0.f) {
loudnessL = 1.f;
loudnessM = 1.f;
loudnessH = 1.f;
} else {
loudnessL /= maxLoudness;
loudnessM /= maxLoudness;
loudnessH /= maxLoudness;
}
const float
loudness = rms(start, end),
baseMultiplier = 255.f * std::min(1.f, loudness * waveformHeight),
rgInfluence = std::max(0.f, loudnessH - 1.f/3.f) / (2.f/3.f),
baseR = std::max(0.f, loudnessL * (1.164f - 0.164f*rgInfluence) * (1.f - loudnessM*0.28f)),
baseG = std::max(0.f, loudnessM * (1.087f - 0.087f*rgInfluence) * (1.f - loudnessL*0.33f)),
baseB = std::max(0.f, loudnessH * (3.6f - loudnessL*1.2f - loudnessM*0.8f)),
brightest = *std::max_element(columnData.begin(), columnData.end());
for (int y = 0; y != waveformHeight; ++y) {
const float multiplier = std::min(1.f, columnData[y] / brightest * brightnessBoost);
unsigned char *const pixel = image.data() + ((waveformHeight - 1 - y + padding) * width + x) * 3;
pixel[0] = baseMultiplier * std::min(1.f, multiplier * baseR);
pixel[1] = baseMultiplier * std::min(1.f, multiplier * baseG);
pixel[2] = baseMultiplier * std::min(1.f, multiplier * baseB);
}
std::fill(columnData.begin(), columnData.end(), 0);
x = newX;
}
if (index == frameCount) break;
const int
position = (samples[index] + 1.f) / 2.f * waveformHeight,
start = std::min(previousPosition, position),
end = std::max(previousPosition, position);
for (int y = start; y <= end; ++y) columnData[y] += 1;
previousPosition = position;
}
std::vector<unsigned char> pngData;
lodepng::encode(pngData, image, width, height, LCT_RGB);
std::ofstream imageFile(
argc > 2 ? std::filesystem::path(converter.from_bytes(argv[2])) : path.stem().concat(".png"),
std::ios::binary
);
imageFile.write(reinterpret_cast<char*>(pngData.data()), pngData.size());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment