Skip to content

Instantly share code, notes, and snippets.

@phoboslab
Created April 23, 2024 10:16
Show Gist options
  • Save phoboslab/2f9fea17218f7864cf9f2652688bf12a to your computer and use it in GitHub Desktop.
Save phoboslab/2f9fea17218f7864cf9f2652688bf12a to your computer and use it in GitHub Desktop.
/*
Copyright (c) 2024, Dominic Szablewski - https://phoboslab.org
SPDX-License-Identifier: MIT
Compile with:
gcc l555.c -lm -O3 -o l555
PCM WAV to L555 MWV:
./l555 in.wav out.mwv
L555 MWV to PCM WAV
./l555 in.mwv out.wav
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define ABORT(...) \
printf("Abort at line " TOSTRING(__LINE__) ": " __VA_ARGS__); \
printf("\n"); \
exit(1)
#define ASSERT(TEST, ...) \
if (!(TEST)) { \
ABORT(__VA_ARGS__); \
}
#define STR_ENDS_WITH(S, E) (strcmp(S + strlen(S) - (sizeof(E)-1), E) == 0)
typedef struct {
unsigned int channels;
unsigned int samplerate;
unsigned int samples_count;
double error;
} samples_desc_t;
#define WAV_CHUNK_ID(S) \
(((unsigned int)(S[3])) << 24 | ((unsigned int)(S[2])) << 16 | \
((unsigned int)(S[1])) << 8 | ((unsigned int)(S[0])))
#define L555_FILTER_ORDER 3
#define L555_MAX_FILTERS 32
void fwrite_u32_le(unsigned int v, FILE *fh) {
unsigned char buf[sizeof(unsigned int)];
buf[0] = 0xff & (v );
buf[1] = 0xff & (v >> 8);
buf[2] = 0xff & (v >> 16);
buf[3] = 0xff & (v >> 24);
int wrote = fwrite(buf, sizeof(unsigned int), 1, fh);
ASSERT(wrote, "Write error");
}
void fwrite_u16_le(unsigned short v, FILE *fh) {
unsigned char buf[sizeof(unsigned short)];
buf[0] = 0xff & (v );
buf[1] = 0xff & (v >> 8);
int wrote = fwrite(buf, sizeof(unsigned short), 1, fh);
ASSERT(wrote, "Write error");
}
unsigned int fread_u32_le(FILE *fh) {
unsigned char buf[sizeof(unsigned int)];
int read = fread(buf, sizeof(unsigned int), 1, fh);
ASSERT(read, "Read error or unexpected end of file");
return (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
}
unsigned short fread_u16_le(FILE *fh) {
unsigned char buf[sizeof(unsigned short)];
int read = fread(buf, sizeof(unsigned short), 1, fh);
ASSERT(read, "Read error or unexpected end of file");
return (buf[1] << 8) | buf[0];
}
static inline int clamp(int v, int min, int max) {
if (v < min) { return min; }
if (v > max) { return max; }
return v;
}
static inline int sign_extend_4(int x) {
return (x & 7) - (x & 8);
}
typedef struct {
short a, b, c;
} l555_hist_t;
typedef struct {
char r[32];
} l555_residuals_t;
static const int l555_scalefactors[32] = {
4096, 5198, 6597, 8372, 10625, 13484, 17113, 21718,
27563, 34980, 44393, 56339, 71500, 90741, 115160, 146149,
185478, 235390, 298734, 379123, 481145, 610622, 774940, 983477,
1248130, 1584003, 2010258, 2551218, 3237751, 4109030, 5214770, 6618065
};
static const int l555_default_coeffs[32][3] = {
// These are the same 4 fixed predictors as used by FLAC,
// represented in .12 fixed point.
{ 0, 0, 0}, // s_0(t) = 0
{ -4096, 0, 0}, // s_1(t) = s(t-1)
{ -8192, 4096, 0}, // s_2(t) = 2s(t-1) - s(t-2)
{-12288, 12288, -4096}, // s_3(t) = 3s(t-1) - 3s(t-2) + s(t-3)
// Unused...
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0},
{ 0, 0, 0}
};
unsigned char *l555_encode(short *sample_data, unsigned int samples_len, int coeffs[32][3], unsigned int *encoded_size, double *total_error) {
int frame_count = samples_len / 32;
*encoded_size = frame_count * 18;
unsigned char *encoded_data = malloc(*encoded_size);
ASSERT(encoded_data, "Malloc for %d bytes failed", *encoded_size);
int data_p = 0;
memcpy(coeffs, l555_default_coeffs, sizeof(l555_default_coeffs));
l555_hist_t hist = {0};
for (int f = 0; f < frame_count; f++) {
l555_residuals_t best_residuals = {0};
l555_hist_t best_hist = {0};
double best_error = INFINITY;
int best_cf = 0;
int best_sf_neg = 0;
int best_sf_pos = 0;
// Brute force the first 4 coeffs; doing all 32 makes this whole thing
// even slower and only marginally improves quality.
for (int cf = 0; cf < 4; cf++) {
// Brute forcing the negative and positive scalefactors this way
// is kinda pointless, as they are usually very close together.
// For better performance:
// 1. use the same sf for pos and neg
// 2. start with the highest sf and go down to zero; stop as soon as
// as the cur_error is worse than best_error
for (int sf_neg = 0; sf_neg < 32; sf_neg++) {
for (int sf_pos = 0; sf_pos < 32; sf_pos++) {
l555_residuals_t cur_residuals;
l555_hist_t cur_hist = hist;
double cur_error = 0;
for (int si = 0; si < 32; si++) {
int predicted =
cur_hist.a * coeffs[cf][0] +
cur_hist.b * coeffs[cf][1] +
cur_hist.c * coeffs[cf][2];
int sample = sample_data[f * 32 + si];
int sf_enc = l555_scalefactors[sample >= 0 ? sf_pos : sf_neg];
// Dividing by float and rounding the result is vital for better quality!
// int encoded = clamp(((sample << 12) + predicted) / sf_enc, -8, 7);
int encoded = clamp(round(((sample << 12) + predicted) / (float)sf_enc), -8, 7);
cur_residuals.r[si] = encoded;
// Careful, we might use a different sf for decoding than for
// encoding, if the sample was negative, but rounded to zero!
int sf_dec = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg];
int decoded = clamp((encoded * sf_dec - predicted) >> 12, -32768, 32767);
cur_hist.c = cur_hist.b;
cur_hist.b = cur_hist.a;
cur_hist.a = decoded;
double err = sample - decoded;
cur_error += err * err;
}
if (cur_error < best_error) {
best_error = cur_error;
best_cf = cf;
best_sf_neg = sf_neg;
best_sf_pos = sf_pos;
best_hist = cur_hist;
best_residuals = cur_residuals;
}
}
}
}
// Wrtie frame with the best residuals
printf("frame: %6d, cf: %2d, sf_neg: %2d, sf_pos: %2d\n", f, best_cf, best_sf_neg, best_sf_pos);
unsigned short frame_header = (best_sf_neg) | (best_sf_pos << 5) | (best_cf << 10);
encoded_data[data_p++] = (frame_header >> 0) & 0xff;
encoded_data[data_p++] = (frame_header >> 8) & 0xff;
for (int s = 0; s < 32; s+=2) {
encoded_data[data_p++] =
((best_residuals.r[s] & 0xf) << 4) |
(best_residuals.r[s+1] & 0xf);
}
*total_error += best_error;
hist = best_hist;
}
return encoded_data;
}
short *l555_decode(unsigned char *encoded_data, unsigned int encoded_size, int coeffs[32][3], unsigned int *wav_size) {
int frame_count = encoded_size / 18;
*wav_size = frame_count * 32 * 2;
short *sample_data = malloc(*wav_size);
ASSERT(sample_data, "Malloc for %d bytes failed", *wav_size);
l555_hist_t hist = {0};
int p = 0;
int so = 0;
for (int f = 0; f < frame_count; f++) {
unsigned short frame_header = encoded_data[p] | (encoded_data[p+1] << 8);
p += 2;
unsigned int sf_neg = (frame_header >> 0) & 0x1f;
unsigned int sf_pos = (frame_header >> 5) & 0x1f;
unsigned int cf = (frame_header >> 10) & 0x1f;
for (int si = 0; si < 32; si++) {
int predicted =
hist.a * coeffs[cf][0] +
hist.b * coeffs[cf][1] +
hist.c * coeffs[cf][2];
int nibble = (si & 1) ? (encoded_data[p++] & 0xf) : ((encoded_data[p] >> 4) & 0xf);
int encoded = sign_extend_4(nibble);
int sf = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg];
short sample = clamp((encoded * sf - predicted) >> 12, -32768, 32767);
sample_data[so++] = sample;
hist.c = hist.b;
hist.b = hist.a;
hist.a = sample;
}
}
return sample_data;
}
short *wav_read(const char *path, samples_desc_t *desc) {
FILE *fh = fopen(path, "rb");
ASSERT(fh, "Can't open %s for reading", path);
unsigned int container_type = fread_u32_le(fh);
ASSERT(container_type == WAV_CHUNK_ID("RIFF"), "Not a RIFF container");
unsigned int file_size = fread_u32_le(fh);
unsigned int wavid = fread_u32_le(fh);
ASSERT(wavid == WAV_CHUNK_ID("WAVE"), "No WAVE id found");
unsigned int data_size = 0;
unsigned int format_length = 0;
unsigned int format_type = 0;
unsigned int channels = 0;
unsigned int samplerate = 0;
unsigned int byte_rate = 0;
unsigned int block_align = 0;
unsigned int bits_per_sample = 0;
int l555_coeffs[32][3] = {0};
/* Find the fmt, pflt and data chunk, skip all others */
while (1) {
unsigned int chunk_type = fread_u32_le(fh);
unsigned int chunk_size = fread_u32_le(fh);
printf("chunk type %c%c%c%c\n", (chunk_type >> 0) & 0xff, (chunk_type >> 8) & 0xff, (chunk_type >> 16) & 0xff, (chunk_type >> 24) & 0xff);
if (chunk_type == WAV_CHUNK_ID("fmt ")) {
ASSERT(chunk_size == 16 || chunk_size == 18, "WAV fmt chunk size missmatch");
format_type = fread_u16_le(fh);
channels = fread_u16_le(fh);
samplerate = fread_u32_le(fh);
byte_rate = fread_u32_le(fh);
block_align = fread_u16_le(fh);
bits_per_sample = fread_u16_le(fh);
if (chunk_size == 18) {
unsigned short extra_params = fread_u16_le(fh);
ASSERT(extra_params == 0, "WAV fmt extra params not supported");
}
}
else if (chunk_type == WAV_CHUNK_ID("pflt ")) {
unsigned int filter_order = fread_u32_le(fh);
unsigned int filter_count = fread_u32_le(fh);
ASSERT(filter_order == 3, "Invalid filter order %d for format l5", filter_order);
unsigned int expected_chunk_size = 32 * 3 * 4 + 8;
ASSERT(chunk_size == expected_chunk_size, "Invalid chunk size %d for format l5", chunk_size);
// Apparently we always have 32*3 coeffs in this chunk, regardless
// of the filter_count.
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 3; j++) {
l555_coeffs[i][j] = fread_u32_le(fh);
}
}
}
else if (chunk_type == WAV_CHUNK_ID("data")) {
data_size = chunk_size;
break;
}
else {
int seek_result = fseek(fh, chunk_size, SEEK_CUR);
ASSERT(seek_result == 0, "Malformed RIFF header");
}
}
ASSERT(bits_per_sample == 16, "Bits per samples != 16");
ASSERT(data_size, "No data chunk");
short *sample_data;
unsigned int wav_size;
if (format_type == 1) {
wav_size = data_size;
sample_data = malloc(wav_size);
ASSERT(sample_data, "Malloc for %d bytes failed", data_size);
int read = fread(sample_data, data_size, 1, fh);
ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size);
}
else if (format_type == 0x555) {
unsigned char *l555_data = malloc(data_size);
ASSERT(l555_data, "Malloc for %d bytes failed", data_size);
int read = fread(l555_data, data_size, 1, fh);
ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size);
sample_data = l555_decode(l555_data, data_size, l555_coeffs, &wav_size);
free(l555_data);
}
else {
ABORT("Unknown format type %d", format_type);
}
fclose(fh);
desc->samplerate = samplerate;
desc->samples_count = wav_size / (channels * (bits_per_sample/8));
desc->channels = channels;
return sample_data;
}
int wav_write(const char *path, short *sample_data, samples_desc_t *desc) {
unsigned int data_size = desc->samples_count * desc->channels * sizeof(short);
unsigned int samplerate = desc->samplerate;
short bits_per_sample = 16;
short channels = desc->channels;
FILE *fh = fopen(path, "wb");
ASSERT(fh, "Can't open %s for writing", path);
fwrite("RIFF", 1, 4, fh);
fwrite_u32_le(data_size + 44 - 8, fh);
fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh);
fwrite_u16_le(0x0001, fh); // format_type
fwrite_u16_le(channels, fh);
fwrite_u32_le(samplerate, fh);
fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh);
fwrite_u16_le(channels * bits_per_sample/8, fh);
fwrite_u16_le(bits_per_sample, fh);
fwrite("data", 1, 4, fh);
fwrite_u32_le(data_size, fh);
fwrite((void*)sample_data, data_size, 1, fh);
fclose(fh);
return data_size + 44 - 8;
}
int mwv_write(const char *path, short *sample_data, samples_desc_t *desc) {
unsigned int samplerate = desc->samplerate;
short bits_per_sample = 16;
short channels = desc->channels;
desc->error = 0;
int l555_coeffs[32][3] = {0};
unsigned int data_size = 0;
unsigned char *encoded_data = l555_encode(sample_data, desc->samples_count, l555_coeffs, &data_size, &desc->error);
FILE *fh = fopen(path, "wb");
ASSERT(fh, "Can't open %s for writing", path);
fwrite("RIFF", 1, 4, fh);
fwrite_u32_le(data_size + 44 - 8, fh);
fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh);
fwrite_u16_le(0x0555, fh); // format_type
fwrite_u16_le(channels, fh);
fwrite_u32_le(samplerate, fh);
fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh);
fwrite_u16_le(channels * bits_per_sample/8, fh);
fwrite_u16_le(bits_per_sample, fh);
fwrite("pflt", 1, 4, fh);
fwrite_u32_le(32 * 3 * 4 + 8, fh); // chunk size
fwrite_u32_le(3, fh); // filter_order
fwrite_u32_le(32, fh); // filter_count. fixme, maybe? We have to store all 32 filters anyway
fwrite((void*)l555_coeffs, sizeof(l555_coeffs), 1, fh);
fwrite("data", 1, 4, fh);
fwrite_u32_le(data_size, fh);
fwrite((void*)encoded_data, data_size, 1, fh);
fclose(fh);
return data_size + 44 - 8;
}
int main(int argc, char **argv) {
ASSERT(argc >= 3, "\nUsage: qoaconv in.{wav,mp3,flac,qoa} out.{wav,qoa}")
samples_desc_t desc;
short *sample_data = NULL;
/* Decode input */
if (STR_ENDS_WITH(argv[1], ".wav") || STR_ENDS_WITH(argv[1], ".mwv")) {
sample_data = wav_read(argv[1], &desc);
}
else {
ABORT("Unknown file type for %s", argv[1]);
}
ASSERT(sample_data, "Can't load/decode %s", argv[1]);
printf(
"%s: channels: %d, samplerate: %d hz, samples per channel: %d, duration: %d sec\n",
argv[1], desc.channels, desc.samplerate, desc.samples_count, desc.samples_count/desc.samplerate
);
/* Encode output */
int bytes_written = 0;
double psnr = INFINITY;
if (STR_ENDS_WITH(argv[2], ".wav")) {
bytes_written = wav_write(argv[2], sample_data, &desc);
}
else if (STR_ENDS_WITH(argv[2], ".mwv")) {
bytes_written = mwv_write(argv[2], sample_data, &desc);
psnr = -20.0 * log10(sqrt(desc.error/(desc.samples_count * desc.channels)) / 32768.0);
}
else {
ABORT("Unknown file type for %s", argv[2]);
}
ASSERT(bytes_written, "Can't write/encode %s", argv[2]);
free(sample_data);
printf(
"%s: size: %d kb (%d bytes) = %.2f kbit/s, psnr: %.2f db\n",
argv[2], bytes_written/1024, bytes_written,
(((float)bytes_written*8)/((float)desc.samples_count/(float)desc.samplerate))/1024, psnr
);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment