Created
April 23, 2024 10:16
-
-
Save phoboslab/2f9fea17218f7864cf9f2652688bf12a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright (c) 2024, Dominic Szablewski - https://phoboslab.org | |
SPDX-License-Identifier: MIT | |
Compile with: | |
gcc l555.c -lm -O3 -o l555 | |
PCM WAV to L555 MWV: | |
./l555 in.wav out.mwv | |
L555 MWV to PCM WAV | |
./l555 in.mwv out.wav | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <string.h> | |
#define STRINGIFY(x) #x | |
#define TOSTRING(x) STRINGIFY(x) | |
#define ABORT(...) \ | |
printf("Abort at line " TOSTRING(__LINE__) ": " __VA_ARGS__); \ | |
printf("\n"); \ | |
exit(1) | |
#define ASSERT(TEST, ...) \ | |
if (!(TEST)) { \ | |
ABORT(__VA_ARGS__); \ | |
} | |
#define STR_ENDS_WITH(S, E) (strcmp(S + strlen(S) - (sizeof(E)-1), E) == 0) | |
typedef struct { | |
unsigned int channels; | |
unsigned int samplerate; | |
unsigned int samples_count; | |
double error; | |
} samples_desc_t; | |
#define WAV_CHUNK_ID(S) \ | |
(((unsigned int)(S[3])) << 24 | ((unsigned int)(S[2])) << 16 | \ | |
((unsigned int)(S[1])) << 8 | ((unsigned int)(S[0]))) | |
#define L555_FILTER_ORDER 3 | |
#define L555_MAX_FILTERS 32 | |
void fwrite_u32_le(unsigned int v, FILE *fh) { | |
unsigned char buf[sizeof(unsigned int)]; | |
buf[0] = 0xff & (v ); | |
buf[1] = 0xff & (v >> 8); | |
buf[2] = 0xff & (v >> 16); | |
buf[3] = 0xff & (v >> 24); | |
int wrote = fwrite(buf, sizeof(unsigned int), 1, fh); | |
ASSERT(wrote, "Write error"); | |
} | |
void fwrite_u16_le(unsigned short v, FILE *fh) { | |
unsigned char buf[sizeof(unsigned short)]; | |
buf[0] = 0xff & (v ); | |
buf[1] = 0xff & (v >> 8); | |
int wrote = fwrite(buf, sizeof(unsigned short), 1, fh); | |
ASSERT(wrote, "Write error"); | |
} | |
unsigned int fread_u32_le(FILE *fh) { | |
unsigned char buf[sizeof(unsigned int)]; | |
int read = fread(buf, sizeof(unsigned int), 1, fh); | |
ASSERT(read, "Read error or unexpected end of file"); | |
return (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; | |
} | |
unsigned short fread_u16_le(FILE *fh) { | |
unsigned char buf[sizeof(unsigned short)]; | |
int read = fread(buf, sizeof(unsigned short), 1, fh); | |
ASSERT(read, "Read error or unexpected end of file"); | |
return (buf[1] << 8) | buf[0]; | |
} | |
static inline int clamp(int v, int min, int max) { | |
if (v < min) { return min; } | |
if (v > max) { return max; } | |
return v; | |
} | |
static inline int sign_extend_4(int x) { | |
return (x & 7) - (x & 8); | |
} | |
typedef struct { | |
short a, b, c; | |
} l555_hist_t; | |
typedef struct { | |
char r[32]; | |
} l555_residuals_t; | |
static const int l555_scalefactors[32] = { | |
4096, 5198, 6597, 8372, 10625, 13484, 17113, 21718, | |
27563, 34980, 44393, 56339, 71500, 90741, 115160, 146149, | |
185478, 235390, 298734, 379123, 481145, 610622, 774940, 983477, | |
1248130, 1584003, 2010258, 2551218, 3237751, 4109030, 5214770, 6618065 | |
}; | |
static const int l555_default_coeffs[32][3] = { | |
// These are the same 4 fixed predictors as used by FLAC, | |
// represented in .12 fixed point. | |
{ 0, 0, 0}, // s_0(t) = 0 | |
{ -4096, 0, 0}, // s_1(t) = s(t-1) | |
{ -8192, 4096, 0}, // s_2(t) = 2s(t-1) - s(t-2) | |
{-12288, 12288, -4096}, // s_3(t) = 3s(t-1) - 3s(t-2) + s(t-3) | |
// Unused... | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0}, | |
{ 0, 0, 0} | |
}; | |
unsigned char *l555_encode(short *sample_data, unsigned int samples_len, int coeffs[32][3], unsigned int *encoded_size, double *total_error) { | |
int frame_count = samples_len / 32; | |
*encoded_size = frame_count * 18; | |
unsigned char *encoded_data = malloc(*encoded_size); | |
ASSERT(encoded_data, "Malloc for %d bytes failed", *encoded_size); | |
int data_p = 0; | |
memcpy(coeffs, l555_default_coeffs, sizeof(l555_default_coeffs)); | |
l555_hist_t hist = {0}; | |
for (int f = 0; f < frame_count; f++) { | |
l555_residuals_t best_residuals = {0}; | |
l555_hist_t best_hist = {0}; | |
double best_error = INFINITY; | |
int best_cf = 0; | |
int best_sf_neg = 0; | |
int best_sf_pos = 0; | |
// Brute force the first 4 coeffs; doing all 32 makes this whole thing | |
// even slower and only marginally improves quality. | |
for (int cf = 0; cf < 4; cf++) { | |
// Brute forcing the negative and positive scalefactors this way | |
// is kinda pointless, as they are usually very close together. | |
// For better performance: | |
// 1. use the same sf for pos and neg | |
// 2. start with the highest sf and go down to zero; stop as soon as | |
// as the cur_error is worse than best_error | |
for (int sf_neg = 0; sf_neg < 32; sf_neg++) { | |
for (int sf_pos = 0; sf_pos < 32; sf_pos++) { | |
l555_residuals_t cur_residuals; | |
l555_hist_t cur_hist = hist; | |
double cur_error = 0; | |
for (int si = 0; si < 32; si++) { | |
int predicted = | |
cur_hist.a * coeffs[cf][0] + | |
cur_hist.b * coeffs[cf][1] + | |
cur_hist.c * coeffs[cf][2]; | |
int sample = sample_data[f * 32 + si]; | |
int sf_enc = l555_scalefactors[sample >= 0 ? sf_pos : sf_neg]; | |
// Dividing by float and rounding the result is vital for better quality! | |
// int encoded = clamp(((sample << 12) + predicted) / sf_enc, -8, 7); | |
int encoded = clamp(round(((sample << 12) + predicted) / (float)sf_enc), -8, 7); | |
cur_residuals.r[si] = encoded; | |
// Careful, we might use a different sf for decoding than for | |
// encoding, if the sample was negative, but rounded to zero! | |
int sf_dec = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg]; | |
int decoded = clamp((encoded * sf_dec - predicted) >> 12, -32768, 32767); | |
cur_hist.c = cur_hist.b; | |
cur_hist.b = cur_hist.a; | |
cur_hist.a = decoded; | |
double err = sample - decoded; | |
cur_error += err * err; | |
} | |
if (cur_error < best_error) { | |
best_error = cur_error; | |
best_cf = cf; | |
best_sf_neg = sf_neg; | |
best_sf_pos = sf_pos; | |
best_hist = cur_hist; | |
best_residuals = cur_residuals; | |
} | |
} | |
} | |
} | |
// Wrtie frame with the best residuals | |
printf("frame: %6d, cf: %2d, sf_neg: %2d, sf_pos: %2d\n", f, best_cf, best_sf_neg, best_sf_pos); | |
unsigned short frame_header = (best_sf_neg) | (best_sf_pos << 5) | (best_cf << 10); | |
encoded_data[data_p++] = (frame_header >> 0) & 0xff; | |
encoded_data[data_p++] = (frame_header >> 8) & 0xff; | |
for (int s = 0; s < 32; s+=2) { | |
encoded_data[data_p++] = | |
((best_residuals.r[s] & 0xf) << 4) | | |
(best_residuals.r[s+1] & 0xf); | |
} | |
*total_error += best_error; | |
hist = best_hist; | |
} | |
return encoded_data; | |
} | |
short *l555_decode(unsigned char *encoded_data, unsigned int encoded_size, int coeffs[32][3], unsigned int *wav_size) { | |
int frame_count = encoded_size / 18; | |
*wav_size = frame_count * 32 * 2; | |
short *sample_data = malloc(*wav_size); | |
ASSERT(sample_data, "Malloc for %d bytes failed", *wav_size); | |
l555_hist_t hist = {0}; | |
int p = 0; | |
int so = 0; | |
for (int f = 0; f < frame_count; f++) { | |
unsigned short frame_header = encoded_data[p] | (encoded_data[p+1] << 8); | |
p += 2; | |
unsigned int sf_neg = (frame_header >> 0) & 0x1f; | |
unsigned int sf_pos = (frame_header >> 5) & 0x1f; | |
unsigned int cf = (frame_header >> 10) & 0x1f; | |
for (int si = 0; si < 32; si++) { | |
int predicted = | |
hist.a * coeffs[cf][0] + | |
hist.b * coeffs[cf][1] + | |
hist.c * coeffs[cf][2]; | |
int nibble = (si & 1) ? (encoded_data[p++] & 0xf) : ((encoded_data[p] >> 4) & 0xf); | |
int encoded = sign_extend_4(nibble); | |
int sf = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg]; | |
short sample = clamp((encoded * sf - predicted) >> 12, -32768, 32767); | |
sample_data[so++] = sample; | |
hist.c = hist.b; | |
hist.b = hist.a; | |
hist.a = sample; | |
} | |
} | |
return sample_data; | |
} | |
short *wav_read(const char *path, samples_desc_t *desc) { | |
FILE *fh = fopen(path, "rb"); | |
ASSERT(fh, "Can't open %s for reading", path); | |
unsigned int container_type = fread_u32_le(fh); | |
ASSERT(container_type == WAV_CHUNK_ID("RIFF"), "Not a RIFF container"); | |
unsigned int file_size = fread_u32_le(fh); | |
unsigned int wavid = fread_u32_le(fh); | |
ASSERT(wavid == WAV_CHUNK_ID("WAVE"), "No WAVE id found"); | |
unsigned int data_size = 0; | |
unsigned int format_length = 0; | |
unsigned int format_type = 0; | |
unsigned int channels = 0; | |
unsigned int samplerate = 0; | |
unsigned int byte_rate = 0; | |
unsigned int block_align = 0; | |
unsigned int bits_per_sample = 0; | |
int l555_coeffs[32][3] = {0}; | |
/* Find the fmt, pflt and data chunk, skip all others */ | |
while (1) { | |
unsigned int chunk_type = fread_u32_le(fh); | |
unsigned int chunk_size = fread_u32_le(fh); | |
printf("chunk type %c%c%c%c\n", (chunk_type >> 0) & 0xff, (chunk_type >> 8) & 0xff, (chunk_type >> 16) & 0xff, (chunk_type >> 24) & 0xff); | |
if (chunk_type == WAV_CHUNK_ID("fmt ")) { | |
ASSERT(chunk_size == 16 || chunk_size == 18, "WAV fmt chunk size missmatch"); | |
format_type = fread_u16_le(fh); | |
channels = fread_u16_le(fh); | |
samplerate = fread_u32_le(fh); | |
byte_rate = fread_u32_le(fh); | |
block_align = fread_u16_le(fh); | |
bits_per_sample = fread_u16_le(fh); | |
if (chunk_size == 18) { | |
unsigned short extra_params = fread_u16_le(fh); | |
ASSERT(extra_params == 0, "WAV fmt extra params not supported"); | |
} | |
} | |
else if (chunk_type == WAV_CHUNK_ID("pflt ")) { | |
unsigned int filter_order = fread_u32_le(fh); | |
unsigned int filter_count = fread_u32_le(fh); | |
ASSERT(filter_order == 3, "Invalid filter order %d for format l5", filter_order); | |
unsigned int expected_chunk_size = 32 * 3 * 4 + 8; | |
ASSERT(chunk_size == expected_chunk_size, "Invalid chunk size %d for format l5", chunk_size); | |
// Apparently we always have 32*3 coeffs in this chunk, regardless | |
// of the filter_count. | |
for (int i = 0; i < 32; i++) { | |
for (int j = 0; j < 3; j++) { | |
l555_coeffs[i][j] = fread_u32_le(fh); | |
} | |
} | |
} | |
else if (chunk_type == WAV_CHUNK_ID("data")) { | |
data_size = chunk_size; | |
break; | |
} | |
else { | |
int seek_result = fseek(fh, chunk_size, SEEK_CUR); | |
ASSERT(seek_result == 0, "Malformed RIFF header"); | |
} | |
} | |
ASSERT(bits_per_sample == 16, "Bits per samples != 16"); | |
ASSERT(data_size, "No data chunk"); | |
short *sample_data; | |
unsigned int wav_size; | |
if (format_type == 1) { | |
wav_size = data_size; | |
sample_data = malloc(wav_size); | |
ASSERT(sample_data, "Malloc for %d bytes failed", data_size); | |
int read = fread(sample_data, data_size, 1, fh); | |
ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size); | |
} | |
else if (format_type == 0x555) { | |
unsigned char *l555_data = malloc(data_size); | |
ASSERT(l555_data, "Malloc for %d bytes failed", data_size); | |
int read = fread(l555_data, data_size, 1, fh); | |
ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size); | |
sample_data = l555_decode(l555_data, data_size, l555_coeffs, &wav_size); | |
free(l555_data); | |
} | |
else { | |
ABORT("Unknown format type %d", format_type); | |
} | |
fclose(fh); | |
desc->samplerate = samplerate; | |
desc->samples_count = wav_size / (channels * (bits_per_sample/8)); | |
desc->channels = channels; | |
return sample_data; | |
} | |
int wav_write(const char *path, short *sample_data, samples_desc_t *desc) { | |
unsigned int data_size = desc->samples_count * desc->channels * sizeof(short); | |
unsigned int samplerate = desc->samplerate; | |
short bits_per_sample = 16; | |
short channels = desc->channels; | |
FILE *fh = fopen(path, "wb"); | |
ASSERT(fh, "Can't open %s for writing", path); | |
fwrite("RIFF", 1, 4, fh); | |
fwrite_u32_le(data_size + 44 - 8, fh); | |
fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh); | |
fwrite_u16_le(0x0001, fh); // format_type | |
fwrite_u16_le(channels, fh); | |
fwrite_u32_le(samplerate, fh); | |
fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh); | |
fwrite_u16_le(channels * bits_per_sample/8, fh); | |
fwrite_u16_le(bits_per_sample, fh); | |
fwrite("data", 1, 4, fh); | |
fwrite_u32_le(data_size, fh); | |
fwrite((void*)sample_data, data_size, 1, fh); | |
fclose(fh); | |
return data_size + 44 - 8; | |
} | |
int mwv_write(const char *path, short *sample_data, samples_desc_t *desc) { | |
unsigned int samplerate = desc->samplerate; | |
short bits_per_sample = 16; | |
short channels = desc->channels; | |
desc->error = 0; | |
int l555_coeffs[32][3] = {0}; | |
unsigned int data_size = 0; | |
unsigned char *encoded_data = l555_encode(sample_data, desc->samples_count, l555_coeffs, &data_size, &desc->error); | |
FILE *fh = fopen(path, "wb"); | |
ASSERT(fh, "Can't open %s for writing", path); | |
fwrite("RIFF", 1, 4, fh); | |
fwrite_u32_le(data_size + 44 - 8, fh); | |
fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh); | |
fwrite_u16_le(0x0555, fh); // format_type | |
fwrite_u16_le(channels, fh); | |
fwrite_u32_le(samplerate, fh); | |
fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh); | |
fwrite_u16_le(channels * bits_per_sample/8, fh); | |
fwrite_u16_le(bits_per_sample, fh); | |
fwrite("pflt", 1, 4, fh); | |
fwrite_u32_le(32 * 3 * 4 + 8, fh); // chunk size | |
fwrite_u32_le(3, fh); // filter_order | |
fwrite_u32_le(32, fh); // filter_count. fixme, maybe? We have to store all 32 filters anyway | |
fwrite((void*)l555_coeffs, sizeof(l555_coeffs), 1, fh); | |
fwrite("data", 1, 4, fh); | |
fwrite_u32_le(data_size, fh); | |
fwrite((void*)encoded_data, data_size, 1, fh); | |
fclose(fh); | |
return data_size + 44 - 8; | |
} | |
int main(int argc, char **argv) { | |
ASSERT(argc >= 3, "\nUsage: qoaconv in.{wav,mp3,flac,qoa} out.{wav,qoa}") | |
samples_desc_t desc; | |
short *sample_data = NULL; | |
/* Decode input */ | |
if (STR_ENDS_WITH(argv[1], ".wav") || STR_ENDS_WITH(argv[1], ".mwv")) { | |
sample_data = wav_read(argv[1], &desc); | |
} | |
else { | |
ABORT("Unknown file type for %s", argv[1]); | |
} | |
ASSERT(sample_data, "Can't load/decode %s", argv[1]); | |
printf( | |
"%s: channels: %d, samplerate: %d hz, samples per channel: %d, duration: %d sec\n", | |
argv[1], desc.channels, desc.samplerate, desc.samples_count, desc.samples_count/desc.samplerate | |
); | |
/* Encode output */ | |
int bytes_written = 0; | |
double psnr = INFINITY; | |
if (STR_ENDS_WITH(argv[2], ".wav")) { | |
bytes_written = wav_write(argv[2], sample_data, &desc); | |
} | |
else if (STR_ENDS_WITH(argv[2], ".mwv")) { | |
bytes_written = mwv_write(argv[2], sample_data, &desc); | |
psnr = -20.0 * log10(sqrt(desc.error/(desc.samples_count * desc.channels)) / 32768.0); | |
} | |
else { | |
ABORT("Unknown file type for %s", argv[2]); | |
} | |
ASSERT(bytes_written, "Can't write/encode %s", argv[2]); | |
free(sample_data); | |
printf( | |
"%s: size: %d kb (%d bytes) = %.2f kbit/s, psnr: %.2f db\n", | |
argv[2], bytes_written/1024, bytes_written, | |
(((float)bytes_written*8)/((float)desc.samples_count/(float)desc.samplerate))/1024, psnr | |
); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment