phoboslab · April 23, 2024 10:16
diff --git a/l555.c b/l555.c
 /*

 Copyright (c) 2024, Dominic Szablewski - https://phoboslab.org
 SPDX-License-Identifier: MIT

 Compile with:
 	gcc l555.c -lm -O3 -o l555

 PCM WAV to L555 MWV:
 	./l555 in.wav out.mwv

 L555 MWV to PCM WAV
 	./l555 in.mwv out.wav

 */

 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>

 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)
 #define ABORT(...) \
 	printf("Abort at line " TOSTRING(__LINE__) ": " __VA_ARGS__); \
 	printf("\n"); \
 	exit(1)

 #define ASSERT(TEST, ...) \
 	if (!(TEST)) { \
 		ABORT(__VA_ARGS__); \
 	}

 #define STR_ENDS_WITH(S, E) (strcmp(S + strlen(S) - (sizeof(E)-1), E) == 0)


 typedef struct {
 	unsigned int channels;
 	unsigned int samplerate;
 	unsigned int samples_count;
 	double error;
 } samples_desc_t;

 #define WAV_CHUNK_ID(S) \
 	(((unsigned int)(S[3])) << 24 | ((unsigned int)(S[2])) << 16 | \
 	 ((unsigned int)(S[1])) <<  8 | ((unsigned int)(S[0])))

 #define L555_FILTER_ORDER 3
 #define L555_MAX_FILTERS 32

 void fwrite_u32_le(unsigned int v, FILE *fh) {
 	unsigned char buf[sizeof(unsigned int)];
 	buf[0] = 0xff & (v      );
 	buf[1] = 0xff & (v >>  8);
 	buf[2] = 0xff & (v >> 16);
 	buf[3] = 0xff & (v >> 24);
 	int wrote = fwrite(buf, sizeof(unsigned int), 1, fh);
 	ASSERT(wrote, "Write error");
 }

 void fwrite_u16_le(unsigned short v, FILE *fh) {
 	unsigned char buf[sizeof(unsigned short)];
 	buf[0] = 0xff & (v      );
 	buf[1] = 0xff & (v >>  8);
 	int wrote = fwrite(buf, sizeof(unsigned short), 1, fh);
 	ASSERT(wrote, "Write error");
 }

 unsigned int fread_u32_le(FILE *fh) {
 	unsigned char buf[sizeof(unsigned int)];
 	int read = fread(buf, sizeof(unsigned int), 1, fh);
 	ASSERT(read, "Read error or unexpected end of file");
 	return (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
 }

 unsigned short fread_u16_le(FILE *fh) {
 	unsigned char buf[sizeof(unsigned short)];
 	int read = fread(buf, sizeof(unsigned short), 1, fh);
 	ASSERT(read, "Read error or unexpected end of file");
 	return (buf[1] << 8) | buf[0];
 }

 static inline int clamp(int v, int min, int max) {
 	if (v < min) { return min; }
 	if (v > max) { return max; }
 	return v;
 }

 static inline int sign_extend_4(int x) { 
 	return (x & 7) - (x & 8);
 }

 typedef struct {
 	short a, b, c;
 } l555_hist_t;

 typedef struct {
 	char r[32];
 } l555_residuals_t;

 static const int l555_scalefactors[32] =  {
 	   4096,    5198,    6597,    8372,   10625,   13484,   17113,   21718,
 	  27563,   34980,   44393,   56339,   71500,   90741,  115160,  146149,
 	 185478,  235390,  298734,  379123,  481145,  610622,  774940,  983477,
 	1248130, 1584003, 2010258, 2551218, 3237751, 4109030, 5214770, 6618065
 };

 static const int l555_default_coeffs[32][3] = {
 	// These are the same 4 fixed predictors as used by FLAC, 
 	// represented in .12 fixed point.
 	{     0,     0,     0}, // s_0(t) = 0
 	{ -4096,     0,     0}, // s_1(t) = s(t-1)
 	{ -8192,  4096,     0}, // s_2(t) = 2s(t-1) - s(t-2)
 	{-12288, 12288, -4096}, // s_3(t) = 3s(t-1) - 3s(t-2) + s(t-3)

 	// Unused...
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0},
 	{     0,     0,     0}
 };


 unsigned char *l555_encode(short *sample_data, unsigned int samples_len, int coeffs[32][3], unsigned int *encoded_size, double *total_error) {
 	int frame_count = samples_len / 32;

 	*encoded_size = frame_count * 18;
 	unsigned char *encoded_data = malloc(*encoded_size);
 	ASSERT(encoded_data, "Malloc for %d bytes failed", *encoded_size);

 	int data_p = 0;
 	memcpy(coeffs, l555_default_coeffs, sizeof(l555_default_coeffs));

 	l555_hist_t hist = {0};

 	for (int f = 0; f < frame_count; f++) {

 		l555_residuals_t best_residuals = {0};
 		l555_hist_t best_hist = {0};
 		double best_error = INFINITY;
 		int best_cf = 0;
 		int best_sf_neg = 0;
 		int best_sf_pos = 0;

 		// Brute force the first 4 coeffs; doing all 32 makes this whole thing
 		// even slower and only marginally improves quality.
 		for (int cf = 0; cf < 4; cf++) {

 			// Brute forcing the negative and positive scalefactors this way
 			// is kinda pointless, as they are usually very close together.
 			// For better performance:
 			// 1. use the same sf for pos and neg
 			// 2. start with the highest sf and go down to zero; stop as soon as
 			//    as the cur_error is worse than best_error
 			for (int sf_neg = 0; sf_neg < 32; sf_neg++) {
 				for (int sf_pos = 0; sf_pos < 32; sf_pos++) {

 					l555_residuals_t cur_residuals;
 					l555_hist_t cur_hist = hist;
 					double cur_error = 0;

 					for (int si = 0; si < 32; si++) {
 						int predicted = 
 							cur_hist.a * coeffs[cf][0] + 
 							cur_hist.b * coeffs[cf][1] + 
 							cur_hist.c * coeffs[cf][2];

 						int sample = sample_data[f * 32 + si];
 						int sf_enc = l555_scalefactors[sample >= 0 ? sf_pos : sf_neg];

 						// Dividing by float and rounding the result is vital for better quality!
 						// int encoded = clamp(((sample << 12) + predicted) / sf_enc, -8, 7);
 						int encoded = clamp(round(((sample << 12) + predicted) / (float)sf_enc), -8, 7);
 						cur_residuals.r[si] = encoded;

 						// Careful, we might use a different sf for decoding than for
 						// encoding, if the sample was negative, but rounded to zero!
 						int sf_dec = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg];
 						int decoded = clamp((encoded * sf_dec - predicted) >> 12, -32768, 32767);

 						cur_hist.c = cur_hist.b;
 						cur_hist.b = cur_hist.a;
 						cur_hist.a = decoded;

 						double err = sample - decoded;
 						cur_error += err * err;
 					}

 					if (cur_error < best_error) {
 						best_error = cur_error;
 						best_cf = cf;
 						best_sf_neg = sf_neg;
 						best_sf_pos = sf_pos;
 						best_hist = cur_hist;
 						best_residuals = cur_residuals;
 					}
 				}
 			}
 		}

 		// Wrtie frame with the best residuals
 		printf("frame: %6d, cf: %2d, sf_neg: %2d, sf_pos: %2d\n", f, best_cf, best_sf_neg, best_sf_pos);
 		unsigned short frame_header = (best_sf_neg) | (best_sf_pos << 5) | (best_cf << 10);
 		encoded_data[data_p++] = (frame_header >> 0) & 0xff;
 		encoded_data[data_p++] = (frame_header >> 8) & 0xff;

 		for (int s = 0; s < 32; s+=2) {
 			encoded_data[data_p++] = 
 				((best_residuals.r[s] & 0xf) << 4) |
 				(best_residuals.r[s+1] & 0xf);
 		}

 		*total_error += best_error;
 		hist = best_hist;
 	}

 	return encoded_data;
 }

 short *l555_decode(unsigned char *encoded_data, unsigned int encoded_size, int coeffs[32][3], unsigned int *wav_size) {
 	int frame_count = encoded_size / 18;

 	*wav_size = frame_count * 32 * 2;
 	short *sample_data = malloc(*wav_size);
 	ASSERT(sample_data, "Malloc for %d bytes failed", *wav_size);

 	l555_hist_t hist = {0};

 	int p = 0;
 	int so = 0;
 	for (int f = 0; f < frame_count; f++) {
 		unsigned short frame_header = encoded_data[p] | (encoded_data[p+1] << 8);
 		p += 2;

 		unsigned int sf_neg = (frame_header >> 0) & 0x1f;
 		unsigned int sf_pos = (frame_header >> 5) & 0x1f;
 		unsigned int cf = (frame_header >> 10) & 0x1f;

 		for (int si = 0; si < 32; si++) {
 			int predicted = 
 				hist.a * coeffs[cf][0] + 
 				hist.b * coeffs[cf][1] + 
 				hist.c * coeffs[cf][2];
 			int nibble = (si & 1) ? (encoded_data[p++] & 0xf) : ((encoded_data[p] >> 4) & 0xf);
 			int encoded = sign_extend_4(nibble);
 			int sf = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg];
 			short sample = clamp((encoded * sf - predicted) >> 12, -32768, 32767);
 			sample_data[so++] = sample;

 			hist.c = hist.b;
 			hist.b = hist.a;
 			hist.a = sample;
 		}
 	}
 	return sample_data;
 }

 short *wav_read(const char *path, samples_desc_t *desc) {
 	FILE *fh = fopen(path, "rb");
 	ASSERT(fh, "Can't open %s for reading", path);

 	unsigned int container_type = fread_u32_le(fh);
 	ASSERT(container_type == WAV_CHUNK_ID("RIFF"), "Not a RIFF container");

 	unsigned int file_size = fread_u32_le(fh);
 	unsigned int wavid = fread_u32_le(fh);
 	ASSERT(wavid == WAV_CHUNK_ID("WAVE"), "No WAVE id found");

 	unsigned int data_size = 0;
 	unsigned int format_length = 0;
 	unsigned int format_type = 0;
 	unsigned int channels = 0;
 	unsigned int samplerate = 0;
 	unsigned int byte_rate = 0;
 	unsigned int block_align = 0;
 	unsigned int bits_per_sample = 0;

 	int l555_coeffs[32][3] = {0};

 	/* Find the fmt, pflt and data chunk, skip all others */
 	while (1) {
 		unsigned int chunk_type = fread_u32_le(fh);
 		unsigned int chunk_size = fread_u32_le(fh);

 		printf("chunk type %c%c%c%c\n", (chunk_type >> 0) & 0xff, (chunk_type >> 8) & 0xff, (chunk_type >> 16) & 0xff, (chunk_type >> 24) & 0xff);

 		if (chunk_type == WAV_CHUNK_ID("fmt ")) {
 			ASSERT(chunk_size == 16 || chunk_size == 18, "WAV fmt chunk size missmatch");

 			format_type = fread_u16_le(fh);
 			channels = fread_u16_le(fh);
 			samplerate = fread_u32_le(fh);
 			byte_rate = fread_u32_le(fh);
 			block_align = fread_u16_le(fh);
 			bits_per_sample = fread_u16_le(fh);

 			if (chunk_size == 18) {
 				unsigned short extra_params = fread_u16_le(fh);
 				ASSERT(extra_params == 0, "WAV fmt extra params not supported");
 			}
 		}
 		else if (chunk_type == WAV_CHUNK_ID("pflt ")) {
 			unsigned int filter_order = fread_u32_le(fh);
 			unsigned int filter_count = fread_u32_le(fh);
 			ASSERT(filter_order == 3, "Invalid filter order %d for format l5", filter_order);

 			unsigned int expected_chunk_size = 32 * 3 * 4 + 8;
 			ASSERT(chunk_size == expected_chunk_size, "Invalid chunk size %d for format l5", chunk_size);

 			// Apparently we always have 32*3 coeffs in this chunk, regardless
 			// of the filter_count.
 			for (int i = 0; i < 32; i++) {
 				for (int j = 0; j < 3; j++) {
 					l555_coeffs[i][j] = fread_u32_le(fh);
 				}
 			}
 		}
 		else if (chunk_type == WAV_CHUNK_ID("data")) {
 			data_size = chunk_size;
 			break;
 		}
 		else {
 			int seek_result = fseek(fh, chunk_size, SEEK_CUR);
 			ASSERT(seek_result == 0, "Malformed RIFF header");
 		}
 	}

 	ASSERT(bits_per_sample == 16, "Bits per samples != 16");
 	ASSERT(data_size, "No data chunk");

 	short *sample_data;
 	unsigned int wav_size;
 	if (format_type == 1) {
 		wav_size = data_size;
 		sample_data = malloc(wav_size);
 		ASSERT(sample_data, "Malloc for %d bytes failed", data_size);
 		int read = fread(sample_data, data_size, 1, fh);
 		ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size);
 	}
 	else if (format_type == 0x555) {
 		unsigned char *l555_data = malloc(data_size);
 		ASSERT(l555_data, "Malloc for %d bytes failed", data_size);

 		int read = fread(l555_data, data_size, 1, fh);
 		ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size);

 		sample_data = l555_decode(l555_data, data_size, l555_coeffs, &wav_size);
 		free(l555_data);
 	}
 	else {
 		ABORT("Unknown format type %d", format_type);
 	}	
 	fclose(fh);

 	desc->samplerate = samplerate;
 	desc->samples_count = wav_size / (channels * (bits_per_sample/8));
 	desc->channels = channels;

 	return sample_data;
 }

 int wav_write(const char *path, short *sample_data, samples_desc_t *desc) {
 	unsigned int data_size = desc->samples_count * desc->channels * sizeof(short);
 	unsigned int samplerate = desc->samplerate;
 	short bits_per_sample = 16;
 	short channels = desc->channels;

 	FILE *fh = fopen(path, "wb");
 	ASSERT(fh, "Can't open %s for writing", path);
 	fwrite("RIFF", 1, 4, fh);
 	fwrite_u32_le(data_size + 44 - 8, fh);
 	fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh);
 	fwrite_u16_le(0x0001, fh); // format_type
 	fwrite_u16_le(channels, fh);
 	fwrite_u32_le(samplerate, fh);
 	fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh);
 	fwrite_u16_le(channels * bits_per_sample/8, fh);
 	fwrite_u16_le(bits_per_sample, fh);
 	fwrite("data", 1, 4, fh);
 	fwrite_u32_le(data_size, fh);
 	fwrite((void*)sample_data, data_size, 1, fh);
 	fclose(fh);
 	return data_size  + 44 - 8;
 }


 int mwv_write(const char *path, short *sample_data, samples_desc_t *desc) {
 	unsigned int samplerate = desc->samplerate;
 	short bits_per_sample = 16;
 	short channels = desc->channels;

 	desc->error = 0;
 	int l555_coeffs[32][3] = {0};
 	unsigned int data_size = 0;
 	unsigned char *encoded_data = l555_encode(sample_data, desc->samples_count, l555_coeffs, &data_size, &desc->error);

 	FILE *fh = fopen(path, "wb");
 	ASSERT(fh, "Can't open %s for writing", path);
 	fwrite("RIFF", 1, 4, fh);
 	fwrite_u32_le(data_size + 44 - 8, fh);
 	fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh);
 	fwrite_u16_le(0x0555, fh); // format_type
 	fwrite_u16_le(channels, fh);
 	fwrite_u32_le(samplerate, fh);
 	fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh);
 	fwrite_u16_le(channels * bits_per_sample/8, fh);
 	fwrite_u16_le(bits_per_sample, fh);
 	
 	fwrite("pflt", 1, 4, fh);
 	fwrite_u32_le(32 * 3 * 4 + 8, fh); // chunk size
 	fwrite_u32_le(3, fh); // filter_order
 	fwrite_u32_le(32, fh); // filter_count. fixme, maybe? We have to store all 32 filters anyway
 	fwrite((void*)l555_coeffs, sizeof(l555_coeffs), 1, fh);

 	fwrite("data", 1, 4, fh);
 	fwrite_u32_le(data_size, fh);
 	fwrite((void*)encoded_data, data_size, 1, fh);
 	
 	fclose(fh);
 	return data_size  + 44 - 8;
 }



 int main(int argc, char **argv) {
 	ASSERT(argc >= 3, "\nUsage: qoaconv in.{wav,mp3,flac,qoa} out.{wav,qoa}")

 	samples_desc_t desc;
 	short *sample_data = NULL;


 	/* Decode input */

 	if (STR_ENDS_WITH(argv[1], ".wav") || STR_ENDS_WITH(argv[1], ".mwv")) {
 		sample_data = wav_read(argv[1], &desc);
 	}
 	else {
 		ABORT("Unknown file type for %s", argv[1]);
 	}

 	ASSERT(sample_data, "Can't load/decode %s", argv[1]);

 	printf(
 		"%s: channels: %d, samplerate: %d hz, samples per channel: %d, duration: %d sec\n",
 		argv[1], desc.channels, desc.samplerate, desc.samples_count, desc.samples_count/desc.samplerate
 	);


 	/* Encode output */
 	
 	int bytes_written = 0;
 	double psnr = INFINITY;
 	if (STR_ENDS_WITH(argv[2], ".wav")) {
 		bytes_written = wav_write(argv[2], sample_data, &desc);
 	}
 	else if (STR_ENDS_WITH(argv[2], ".mwv")) {
 		bytes_written = mwv_write(argv[2], sample_data, &desc);
 		psnr = -20.0 * log10(sqrt(desc.error/(desc.samples_count * desc.channels)) / 32768.0);
 	}
 	else {
 		ABORT("Unknown file type for %s", argv[2]);
 	}

 	ASSERT(bytes_written, "Can't write/encode %s", argv[2]);
 	free(sample_data);

 	printf(
 		"%s: size: %d kb (%d bytes) = %.2f kbit/s, psnr: %.2f db\n",
 		argv[2], bytes_written/1024, bytes_written, 
 		(((float)bytes_written*8)/((float)desc.samples_count/(float)desc.samplerate))/1024, psnr
 	);

 	return 0;
 }
	/*

	Copyright (c) 2024, Dominic Szablewski - https://phoboslab.org
	SPDX-License-Identifier: MIT

	Compile with:
	gcc l555.c -lm -O3 -o l555

	PCM WAV to L555 MWV:
	./l555 in.wav out.mwv

	L555 MWV to PCM WAV
	./l555 in.mwv out.wav

	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <string.h>

	#define STRINGIFY(x) #x
	#define TOSTRING(x) STRINGIFY(x)
	#define ABORT(...) \
	printf("Abort at line " TOSTRING(__LINE__) ": " __VA_ARGS__); \
	printf("\n"); \
	exit(1)

	#define ASSERT(TEST, ...) \
	if (!(TEST)) { \
	ABORT(__VA_ARGS__); \
	}

	#define STR_ENDS_WITH(S, E) (strcmp(S + strlen(S) - (sizeof(E)-1), E) == 0)


	typedef struct {
	unsigned int channels;
	unsigned int samplerate;
	unsigned int samples_count;
	double error;
	} samples_desc_t;

	#define WAV_CHUNK_ID(S) \
	(((unsigned int)(S[3])) << 24 \| ((unsigned int)(S[2])) << 16 \| \
	((unsigned int)(S[1])) << 8 \| ((unsigned int)(S[0])))

	#define L555_FILTER_ORDER 3
	#define L555_MAX_FILTERS 32

	void fwrite_u32_le(unsigned int v, FILE *fh) {
	unsigned char buf[sizeof(unsigned int)];
	buf[0] = 0xff & (v );
	buf[1] = 0xff & (v >> 8);
	buf[2] = 0xff & (v >> 16);
	buf[3] = 0xff & (v >> 24);
	int wrote = fwrite(buf, sizeof(unsigned int), 1, fh);
	ASSERT(wrote, "Write error");
	}

	void fwrite_u16_le(unsigned short v, FILE *fh) {
	unsigned char buf[sizeof(unsigned short)];
	buf[0] = 0xff & (v );
	buf[1] = 0xff & (v >> 8);
	int wrote = fwrite(buf, sizeof(unsigned short), 1, fh);
	ASSERT(wrote, "Write error");
	}

	unsigned int fread_u32_le(FILE *fh) {
	unsigned char buf[sizeof(unsigned int)];
	int read = fread(buf, sizeof(unsigned int), 1, fh);
	ASSERT(read, "Read error or unexpected end of file");
	return (buf[3] << 24) \| (buf[2] << 16) \| (buf[1] << 8) \| buf[0];
	}

	unsigned short fread_u16_le(FILE *fh) {
	unsigned char buf[sizeof(unsigned short)];
	int read = fread(buf, sizeof(unsigned short), 1, fh);
	ASSERT(read, "Read error or unexpected end of file");
	return (buf[1] << 8) \| buf[0];
	}

	static inline int clamp(int v, int min, int max) {
	if (v < min) { return min; }
	if (v > max) { return max; }
	return v;
	}

	static inline int sign_extend_4(int x) {
	return (x & 7) - (x & 8);
	}

	typedef struct {
	short a, b, c;
	} l555_hist_t;

	typedef struct {
	char r[32];
	} l555_residuals_t;

	static const int l555_scalefactors[32] = {
	4096, 5198, 6597, 8372, 10625, 13484, 17113, 21718,
	27563, 34980, 44393, 56339, 71500, 90741, 115160, 146149,
	185478, 235390, 298734, 379123, 481145, 610622, 774940, 983477,
	1248130, 1584003, 2010258, 2551218, 3237751, 4109030, 5214770, 6618065
	};

	static const int l555_default_coeffs[32][3] = {
	// These are the same 4 fixed predictors as used by FLAC,
	// represented in .12 fixed point.
	{ 0, 0, 0}, // s_0(t) = 0
	{ -4096, 0, 0}, // s_1(t) = s(t-1)
	{ -8192, 4096, 0}, // s_2(t) = 2s(t-1) - s(t-2)
	{-12288, 12288, -4096}, // s_3(t) = 3s(t-1) - 3s(t-2) + s(t-3)

	// Unused...
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0},
	{ 0, 0, 0}
	};


	unsigned char l555_encode(short sample_data, unsigned int samples_len, int coeffs[32][3], unsigned int encoded_size, double total_error) {
	int frame_count = samples_len / 32;

	encoded_size = frame_count 18;
	unsigned char encoded_data = malloc(encoded_size);
	ASSERT(encoded_data, "Malloc for %d bytes failed", *encoded_size);

	int data_p = 0;
	memcpy(coeffs, l555_default_coeffs, sizeof(l555_default_coeffs));

	l555_hist_t hist = {0};

	for (int f = 0; f < frame_count; f++) {

	l555_residuals_t best_residuals = {0};
	l555_hist_t best_hist = {0};
	double best_error = INFINITY;
	int best_cf = 0;
	int best_sf_neg = 0;
	int best_sf_pos = 0;

	// Brute force the first 4 coeffs; doing all 32 makes this whole thing
	// even slower and only marginally improves quality.
	for (int cf = 0; cf < 4; cf++) {

	// Brute forcing the negative and positive scalefactors this way
	// is kinda pointless, as they are usually very close together.
	// For better performance:
	// 1. use the same sf for pos and neg
	// 2. start with the highest sf and go down to zero; stop as soon as
	// as the cur_error is worse than best_error
	for (int sf_neg = 0; sf_neg < 32; sf_neg++) {
	for (int sf_pos = 0; sf_pos < 32; sf_pos++) {

	l555_residuals_t cur_residuals;
	l555_hist_t cur_hist = hist;
	double cur_error = 0;

	for (int si = 0; si < 32; si++) {
	int predicted =
	cur_hist.a * coeffs[cf][0] +
	cur_hist.b * coeffs[cf][1] +
	cur_hist.c * coeffs[cf][2];

	int sample = sample_data[f * 32 + si];
	int sf_enc = l555_scalefactors[sample >= 0 ? sf_pos : sf_neg];

	// Dividing by float and rounding the result is vital for better quality!
	// int encoded = clamp(((sample << 12) + predicted) / sf_enc, -8, 7);
	int encoded = clamp(round(((sample << 12) + predicted) / (float)sf_enc), -8, 7);
	cur_residuals.r[si] = encoded;

	// Careful, we might use a different sf for decoding than for
	// encoding, if the sample was negative, but rounded to zero!
	int sf_dec = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg];
	int decoded = clamp((encoded * sf_dec - predicted) >> 12, -32768, 32767);

	cur_hist.c = cur_hist.b;
	cur_hist.b = cur_hist.a;
	cur_hist.a = decoded;

	double err = sample - decoded;
	cur_error += err * err;
	}

	if (cur_error < best_error) {
	best_error = cur_error;
	best_cf = cf;
	best_sf_neg = sf_neg;
	best_sf_pos = sf_pos;
	best_hist = cur_hist;
	best_residuals = cur_residuals;
	}
	}
	}
	}

	// Wrtie frame with the best residuals
	printf("frame: %6d, cf: %2d, sf_neg: %2d, sf_pos: %2d\n", f, best_cf, best_sf_neg, best_sf_pos);
	unsigned short frame_header = (best_sf_neg) \| (best_sf_pos << 5) \| (best_cf << 10);
	encoded_data[data_p++] = (frame_header >> 0) & 0xff;
	encoded_data[data_p++] = (frame_header >> 8) & 0xff;

	for (int s = 0; s < 32; s+=2) {
	encoded_data[data_p++] =
	((best_residuals.r[s] & 0xf) << 4) \|
	(best_residuals.r[s+1] & 0xf);
	}

	*total_error += best_error;
	hist = best_hist;
	}

	return encoded_data;
	}

	short l555_decode(unsigned char encoded_data, unsigned int encoded_size, int coeffs[32][3], unsigned int *wav_size) {
	int frame_count = encoded_size / 18;

	wav_size = frame_count 32 * 2;
	short sample_data = malloc(wav_size);
	ASSERT(sample_data, "Malloc for %d bytes failed", *wav_size);

	l555_hist_t hist = {0};

	int p = 0;
	int so = 0;
	for (int f = 0; f < frame_count; f++) {
	unsigned short frame_header = encoded_data[p] \| (encoded_data[p+1] << 8);
	p += 2;

	unsigned int sf_neg = (frame_header >> 0) & 0x1f;
	unsigned int sf_pos = (frame_header >> 5) & 0x1f;
	unsigned int cf = (frame_header >> 10) & 0x1f;

	for (int si = 0; si < 32; si++) {
	int predicted =
	hist.a * coeffs[cf][0] +
	hist.b * coeffs[cf][1] +
	hist.c * coeffs[cf][2];
	int nibble = (si & 1) ? (encoded_data[p++] & 0xf) : ((encoded_data[p] >> 4) & 0xf);
	int encoded = sign_extend_4(nibble);
	int sf = l555_scalefactors[encoded >= 0 ? sf_pos : sf_neg];
	short sample = clamp((encoded * sf - predicted) >> 12, -32768, 32767);
	sample_data[so++] = sample;

	hist.c = hist.b;
	hist.b = hist.a;
	hist.a = sample;
	}
	}
	return sample_data;
	}

	short wav_read(const char path, samples_desc_t *desc) {
	FILE *fh = fopen(path, "rb");
	ASSERT(fh, "Can't open %s for reading", path);

	unsigned int container_type = fread_u32_le(fh);
	ASSERT(container_type == WAV_CHUNK_ID("RIFF"), "Not a RIFF container");

	unsigned int file_size = fread_u32_le(fh);
	unsigned int wavid = fread_u32_le(fh);
	ASSERT(wavid == WAV_CHUNK_ID("WAVE"), "No WAVE id found");

	unsigned int data_size = 0;
	unsigned int format_length = 0;
	unsigned int format_type = 0;
	unsigned int channels = 0;
	unsigned int samplerate = 0;
	unsigned int byte_rate = 0;
	unsigned int block_align = 0;
	unsigned int bits_per_sample = 0;

	int l555_coeffs[32][3] = {0};

	/* Find the fmt, pflt and data chunk, skip all others */
	while (1) {
	unsigned int chunk_type = fread_u32_le(fh);
	unsigned int chunk_size = fread_u32_le(fh);

	printf("chunk type %c%c%c%c\n", (chunk_type >> 0) & 0xff, (chunk_type >> 8) & 0xff, (chunk_type >> 16) & 0xff, (chunk_type >> 24) & 0xff);

	if (chunk_type == WAV_CHUNK_ID("fmt ")) {
	ASSERT(chunk_size == 16 \|\| chunk_size == 18, "WAV fmt chunk size missmatch");

	format_type = fread_u16_le(fh);
	channels = fread_u16_le(fh);
	samplerate = fread_u32_le(fh);
	byte_rate = fread_u32_le(fh);
	block_align = fread_u16_le(fh);
	bits_per_sample = fread_u16_le(fh);

	if (chunk_size == 18) {
	unsigned short extra_params = fread_u16_le(fh);
	ASSERT(extra_params == 0, "WAV fmt extra params not supported");
	}
	}
	else if (chunk_type == WAV_CHUNK_ID("pflt ")) {
	unsigned int filter_order = fread_u32_le(fh);
	unsigned int filter_count = fread_u32_le(fh);
	ASSERT(filter_order == 3, "Invalid filter order %d for format l5", filter_order);

	unsigned int expected_chunk_size = 32 * 3 * 4 + 8;
	ASSERT(chunk_size == expected_chunk_size, "Invalid chunk size %d for format l5", chunk_size);

	// Apparently we always have 32*3 coeffs in this chunk, regardless
	// of the filter_count.
	for (int i = 0; i < 32; i++) {
	for (int j = 0; j < 3; j++) {
	l555_coeffs[i][j] = fread_u32_le(fh);
	}
	}
	}
	else if (chunk_type == WAV_CHUNK_ID("data")) {
	data_size = chunk_size;
	break;
	}
	else {
	int seek_result = fseek(fh, chunk_size, SEEK_CUR);
	ASSERT(seek_result == 0, "Malformed RIFF header");
	}
	}

	ASSERT(bits_per_sample == 16, "Bits per samples != 16");
	ASSERT(data_size, "No data chunk");

	short *sample_data;
	unsigned int wav_size;
	if (format_type == 1) {
	wav_size = data_size;
	sample_data = malloc(wav_size);
	ASSERT(sample_data, "Malloc for %d bytes failed", data_size);
	int read = fread(sample_data, data_size, 1, fh);
	ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size);
	}
	else if (format_type == 0x555) {
	unsigned char *l555_data = malloc(data_size);
	ASSERT(l555_data, "Malloc for %d bytes failed", data_size);

	int read = fread(l555_data, data_size, 1, fh);
	ASSERT(read, "Read error or unexpected end of file for %d bytes", data_size);

	sample_data = l555_decode(l555_data, data_size, l555_coeffs, &wav_size);
	free(l555_data);
	}
	else {
	ABORT("Unknown format type %d", format_type);
	}
	fclose(fh);

	desc->samplerate = samplerate;
	desc->samples_count = wav_size / (channels * (bits_per_sample/8));
	desc->channels = channels;

	return sample_data;
	}

	int wav_write(const char path, short sample_data, samples_desc_t *desc) {
	unsigned int data_size = desc->samples_count * desc->channels * sizeof(short);
	unsigned int samplerate = desc->samplerate;
	short bits_per_sample = 16;
	short channels = desc->channels;

	FILE *fh = fopen(path, "wb");
	ASSERT(fh, "Can't open %s for writing", path);
	fwrite("RIFF", 1, 4, fh);
	fwrite_u32_le(data_size + 44 - 8, fh);
	fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh);
	fwrite_u16_le(0x0001, fh); // format_type
	fwrite_u16_le(channels, fh);
	fwrite_u32_le(samplerate, fh);
	fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh);
	fwrite_u16_le(channels * bits_per_sample/8, fh);
	fwrite_u16_le(bits_per_sample, fh);
	fwrite("data", 1, 4, fh);
	fwrite_u32_le(data_size, fh);
	fwrite((void*)sample_data, data_size, 1, fh);
	fclose(fh);
	return data_size + 44 - 8;
	}


	int mwv_write(const char path, short sample_data, samples_desc_t *desc) {
	unsigned int samplerate = desc->samplerate;
	short bits_per_sample = 16;
	short channels = desc->channels;

	desc->error = 0;
	int l555_coeffs[32][3] = {0};
	unsigned int data_size = 0;
	unsigned char *encoded_data = l555_encode(sample_data, desc->samples_count, l555_coeffs, &data_size, &desc->error);

	FILE *fh = fopen(path, "wb");
	ASSERT(fh, "Can't open %s for writing", path);
	fwrite("RIFF", 1, 4, fh);
	fwrite_u32_le(data_size + 44 - 8, fh);
	fwrite("WAVEfmt \x10\x00\x00\x00", 1, 12, fh);
	fwrite_u16_le(0x0555, fh); // format_type
	fwrite_u16_le(channels, fh);
	fwrite_u32_le(samplerate, fh);
	fwrite_u32_le(channels * samplerate * bits_per_sample/8, fh);
	fwrite_u16_le(channels * bits_per_sample/8, fh);
	fwrite_u16_le(bits_per_sample, fh);

	fwrite("pflt", 1, 4, fh);
	fwrite_u32_le(32 * 3 * 4 + 8, fh); // chunk size
	fwrite_u32_le(3, fh); // filter_order
	fwrite_u32_le(32, fh); // filter_count. fixme, maybe? We have to store all 32 filters anyway
	fwrite((void*)l555_coeffs, sizeof(l555_coeffs), 1, fh);

	fwrite("data", 1, 4, fh);
	fwrite_u32_le(data_size, fh);
	fwrite((void*)encoded_data, data_size, 1, fh);

	fclose(fh);
	return data_size + 44 - 8;
	}



	int main(int argc, char **argv) {
	ASSERT(argc >= 3, "\nUsage: qoaconv in.{wav,mp3,flac,qoa} out.{wav,qoa}")

	samples_desc_t desc;
	short *sample_data = NULL;


	/* Decode input */

	if (STR_ENDS_WITH(argv[1], ".wav") \|\| STR_ENDS_WITH(argv[1], ".mwv")) {
	sample_data = wav_read(argv[1], &desc);
	}
	else {
	ABORT("Unknown file type for %s", argv[1]);
	}

	ASSERT(sample_data, "Can't load/decode %s", argv[1]);

	printf(
	"%s: channels: %d, samplerate: %d hz, samples per channel: %d, duration: %d sec\n",
	argv[1], desc.channels, desc.samplerate, desc.samples_count, desc.samples_count/desc.samplerate
	);


	/* Encode output */

	int bytes_written = 0;
	double psnr = INFINITY;
	if (STR_ENDS_WITH(argv[2], ".wav")) {
	bytes_written = wav_write(argv[2], sample_data, &desc);
	}
	else if (STR_ENDS_WITH(argv[2], ".mwv")) {
	bytes_written = mwv_write(argv[2], sample_data, &desc);
	psnr = -20.0 * log10(sqrt(desc.error/(desc.samples_count * desc.channels)) / 32768.0);
	}
	else {
	ABORT("Unknown file type for %s", argv[2]);
	}

	ASSERT(bytes_written, "Can't write/encode %s", argv[2]);
	free(sample_data);

	printf(
	"%s: size: %d kb (%d bytes) = %.2f kbit/s, psnr: %.2f db\n",
	argv[2], bytes_written/1024, bytes_written,
	(((float)bytes_written*8)/((float)desc.samples_count/(float)desc.samplerate))/1024, psnr
	);

	return 0;
	}