Last active
December 6, 2024 11:31
-
-
Save leduyquang753/a6ec0f3f5a5117223480c8cb6141341c to your computer and use it in GitHub Desktop.
Colored audio waveform generator based on band splitting.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Colored audio waveform generator. | |
The audio is split into 3 bands and then their intensity is assigned to the color channels as follows: | |
– Under 200 Hz: red. | |
– From 200 Hz to 3000 Hz: green. | |
– Over 3000 Hz: blue. | |
The audio file's path is passed as a command-line argument. | |
Dependencies: | |
– dr_libs: https://github.com/mackron/dr_libs | |
– LodePNG: https://github.com/lvandeve/lodepng | |
*/ | |
#include <algorithm> | |
#include <array> | |
#include <cmath> | |
#include <codecvt> | |
#include <cstdlib> | |
#include <filesystem> | |
#include <fstream> | |
#include <ios> | |
#include <iostream> | |
#include <limits> | |
#include <locale> | |
#include <string> | |
#include <vector> | |
#define DR_FLAC_IMPLEMENTATION | |
#define DR_MP3_IMPLEMENTATION | |
#define DR_WAV_IMPLEMENTATION | |
#include "dr_flac.h" | |
#include "dr_mp3.h" | |
#include "dr_wav.h" | |
#include "lodepng.h" | |
constexpr float pi = 3.14159265358979f; | |
constexpr int | |
pixelsPerSecond = 53, | |
waveformHeight = 69, | |
padding = 16; | |
constexpr float brightnessBoost = 1.25f; | |
struct Filter { | |
float a1, a2, b0, b1, b2; | |
std::array<float, 4> state = {}; | |
std::vector<float> output; | |
Filter( | |
const float a0, const float a1, const float a2, const float b0, const float b1, const float b2 | |
): a1(a1/a0), a2(a2/a0), b0(b0/a0), b1(b1/a0), b2(b2/a0) {} | |
static Filter lowPass(const float sampleRate, const float frequency) { | |
const float | |
w0 = 2.f * pi * frequency / sampleRate, | |
cosw0 = std::cos(w0), | |
alpha = std::sin(w0) / std::sqrt(2.f); | |
return { | |
1.f + alpha, -2.f * cosw0, 1.f - alpha, | |
(1.f - cosw0) / 2.f, 1.f - cosw0, (1.f - cosw0) / 2.f | |
}; | |
} | |
static Filter highPass(const float sampleRate, const float frequency) { | |
const float | |
w0 = 2.f * pi * frequency / sampleRate, | |
cosw0 = std::cos(w0), | |
alpha = std::sin(w0) / std::sqrt(2.f); | |
return { | |
1.f + alpha, -2.f * cosw0, 1.f - alpha, | |
(1.f + cosw0) / 2.f, -1.f - cosw0, (1.f + cosw0) / 2.f | |
}; | |
} | |
void apply(const float *const begin, const float *const end) { | |
output.resize(end - begin); | |
float *out = output.data(); | |
for (const float *in = begin; in != end; ++in, ++out) { | |
const float value | |
= b2*state[0] + b1*state[1] + b0*(*in) | |
- a2*state[2] - a1*state[3]; | |
state[0] = state[1]; | |
state[1] = *in; | |
state[2] = state[3]; | |
state[3] = value; | |
*out = value; | |
} | |
} | |
}; | |
float rms(const float *const begin, const float *const end) { | |
float sum = 0; | |
for (const float *sample = begin; sample != end; ++sample) { | |
const float value = *sample; | |
sum += value * value; | |
} | |
return std::sqrt(sum / static_cast<float>(end - begin)); | |
} | |
int main(int argc, char **argv) { | |
if (argc < 2) { | |
std::cerr << "Usage: " << argv[0] << " <Input audio file> [Output image file]\n"; | |
return 1; | |
} | |
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter; | |
const auto path = std::filesystem::absolute(std::filesystem::path(converter.from_bytes(argv[1]))); | |
if (!std::filesystem::exists(path)) { | |
std::cerr << "The specified file does not exist.\n"; | |
return 2; | |
} | |
std::vector<char> fileData(std::filesystem::file_size(path)); | |
{ | |
std::ifstream file(path, std::ios::binary); | |
file.read(fileData.data(), fileData.size()); | |
} | |
std::vector<float> samples; | |
unsigned channelCount, sampleRate; | |
drflac_uint64 frameCount; | |
{ | |
float *rawSamples; | |
if (path.extension() == ".flac") { | |
rawSamples = drflac_open_memory_and_read_pcm_frames_f32( | |
fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr | |
); | |
} else if (path.extension() == ".mp3") { | |
drmp3_config config; | |
rawSamples = drmp3_open_memory_and_read_pcm_frames_f32( | |
fileData.data(), fileData.size(), &config, &frameCount, nullptr | |
); | |
channelCount = config.channels; | |
sampleRate = config.sampleRate; | |
} else if (path.extension() == ".wav") { | |
rawSamples = drwav_open_memory_and_read_pcm_frames_f32( | |
fileData.data(), fileData.size(), &channelCount, &sampleRate, &frameCount, nullptr | |
); | |
} else { | |
std::cerr << "Supported file formats are FLAC, MP3 and WAV.\n"; | |
return 3; | |
} | |
samples.resize(frameCount); | |
for (drflac_uint64 i = 0; i != frameCount; ++i) samples[i] = rawSamples[i * channelCount]; | |
drflac_free(rawSamples, nullptr); | |
} | |
Filter | |
filterL = Filter::lowPass(sampleRate, 200), | |
filterML = Filter::highPass(sampleRate, 200), | |
filterMH = Filter::lowPass(sampleRate, 3000), | |
filterH = Filter::highPass(sampleRate, 3000); | |
const int samplesPerPixel = sampleRate / pixelsPerSecond; | |
const int | |
width = (frameCount + samplesPerPixel - 1) / samplesPerPixel, | |
height = waveformHeight + padding*2; | |
std::vector<unsigned char> image(width * height * 3); | |
std::vector<int> columnData(waveformHeight); | |
int x = 0, previousPosition = waveformHeight / 2; | |
for (int index = 0; index <= frameCount; ++index) { | |
const int newX = index / samplesPerPixel; | |
if (newX != x || index == frameCount) { | |
const float *const start = samples.data() + x * samplesPerPixel; | |
const float *const end = samples.data() + std::min(static_cast<int>(frameCount), (x+1) * samplesPerPixel); | |
filterL.apply(start, end); | |
filterML.apply(start, end); | |
filterMH.apply(&*filterML.output.begin(), &*filterML.output.end()); | |
filterH.apply(start, end); | |
float | |
loudnessL = rms(&*filterL.output.begin(), &*filterL.output.end()), | |
loudnessM = rms(&*filterMH.output.begin(), &*filterMH.output.end()), | |
loudnessH = rms(&*filterH.output.begin(), &*filterH.output.end()); | |
const float maxLoudness = std::max({loudnessL, loudnessM, loudnessH}); | |
if (maxLoudness == 0.f) { | |
loudnessL = 1.f; | |
loudnessM = 1.f; | |
loudnessH = 1.f; | |
} else { | |
loudnessL /= maxLoudness; | |
loudnessM /= maxLoudness; | |
loudnessH /= maxLoudness; | |
} | |
const float | |
loudness = rms(start, end), | |
baseMultiplier = 255.f * std::min(1.f, loudness * waveformHeight), | |
rgInfluence = std::max(0.f, loudnessH - 1.f/3.f) / (2.f/3.f), | |
baseR = std::max(0.f, loudnessL * (1.164f - 0.164f*rgInfluence) * (1.f - loudnessM*0.28f)), | |
baseG = std::max(0.f, loudnessM * (1.087f - 0.087f*rgInfluence) * (1.f - loudnessL*0.33f)), | |
baseB = std::max(0.f, loudnessH * (3.6f - loudnessL*1.2f - loudnessM*0.8f)), | |
brightest = *std::max_element(columnData.begin(), columnData.end()); | |
for (int y = 0; y != waveformHeight; ++y) { | |
const float multiplier = std::min(1.f, columnData[y] / brightest * brightnessBoost); | |
unsigned char *const pixel = image.data() + ((waveformHeight - 1 - y + padding) * width + x) * 3; | |
pixel[0] = baseMultiplier * std::min(1.f, multiplier * baseR); | |
pixel[1] = baseMultiplier * std::min(1.f, multiplier * baseG); | |
pixel[2] = baseMultiplier * std::min(1.f, multiplier * baseB); | |
} | |
std::fill(columnData.begin(), columnData.end(), 0); | |
x = newX; | |
} | |
if (index == frameCount) break; | |
const int | |
position = (samples[index] + 1.f) / 2.f * waveformHeight, | |
start = std::min(previousPosition, position), | |
end = std::max(previousPosition, position); | |
for (int y = start; y <= end; ++y) columnData[y] += 1; | |
previousPosition = position; | |
} | |
std::vector<unsigned char> pngData; | |
lodepng::encode(pngData, image, width, height, LCT_RGB); | |
std::ofstream imageFile( | |
argc > 2 ? std::filesystem::path(converter.from_bytes(argv[2])) : path.stem().concat(".png"), | |
std::ios::binary | |
); | |
imageFile.write(reinterpret_cast<char*>(pngData.data()), pngData.size()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment