Created
March 5, 2022 03:59
-
-
Save kode54/0912aa2c4fe339913dd5397cea06eab5 to your computer and use it in GitHub Desktop.
Bit o' R8Brain benchmarking of DSD downsampling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include "r8bstate.h" | |
// In my quick benchmarks on Apple Silicon, this decimator makes the following code about 40% faster | |
#define DSD_DECIMATOR 1 | |
#ifdef DSD_DECIMATOR | |
/** | |
* DSD 2 PCM: Stage 1: | |
* Decimate by factor 8 | |
* (one byte (8 samples) -> one float sample) | |
* The bits are processed from least signicifant to most signicicant. | |
* @author Sebastian Gesemann | |
*/ | |
#define dsd2pcm_FILTER_COEFFS_COUNT 64 | |
static const float dsd2pcm_FILTER_COEFFS[64] = { | |
0.09712411121659f, 0.09613438994044f, 0.09417884216316f, 0.09130441727307f, | |
0.08757947648990f, 0.08309142055179f, 0.07794369263673f, 0.07225228745463f, | |
0.06614191680338f, 0.05974199351302f, 0.05318259916599f, 0.04659059631228f, | |
0.04008603356890f, 0.03377897290478f, 0.02776684382775f, 0.02213240062966f, | |
0.01694232798846f, 0.01224650881275f, 0.00807793792573f, 0.00445323755944f, | |
0.00137370697215f, -0.00117318019994f, -0.00321193033831f, -0.00477694265140f, | |
-0.00591028841335f, -0.00665946056286f, -0.00707518873201f, -0.00720940203988f, | |
-0.00711340642819f, -0.00683632603227f, -0.00642384017266f, -0.00591723006715f, | |
-0.00535273320457f, -0.00476118922548f, -0.00416794965654f, -0.00359301524813f, | |
-0.00305135909510f, -0.00255339111833f, -0.00210551956895f, -0.00171076760278f, | |
-0.00136940723130f, -0.00107957856005f, -0.00083786862365f, -0.00063983084245f, | |
-0.00048043272086f, -0.00035442550015f, -0.00025663481039f, -0.00018217573430f, | |
-0.00012659899635f, -0.00008597726991f, -0.00005694188820f, -0.00003668060332f, | |
-0.00002290670286f, -0.00001380895679f, -0.00000799057558f, -0.00000440385083f, | |
-0.00000228567089f, -0.00000109760778f, -0.00000047286430f, -0.00000017129652f, | |
-0.00000004282776f, 0.00000000119422f, 0.00000000949179f, 0.00000000747450f | |
}; | |
struct dsd2pcm_state { | |
/* | |
* This is the 2nd half of an even order symmetric FIR | |
* lowpass filter (to be used on a signal sampled at 44100*64 Hz) | |
* Passband is 0-24 kHz (ripples +/- 0.025 dB) | |
* Stopband starts at 176.4 kHz (rejection: 170 dB) | |
* The overall gain is 2.0 | |
*/ | |
/* These remain constant for the duration */ | |
int FILT_LOOKUP_PARTS; | |
float *FILT_LOOKUP_TABLE; | |
uint8_t *REVERSE_BITS; | |
int FIFO_LENGTH; | |
int FIFO_OFS_MASK; | |
/* These are altered */ | |
int *fifo; | |
int fpos; | |
}; | |
static void dsd2pcm_free(void *); | |
static void dsd2pcm_reset(void *); | |
static void *dsd2pcm_alloc() { | |
struct dsd2pcm_state *state = (struct dsd2pcm_state *)calloc(1, sizeof(struct dsd2pcm_state)); | |
float *FILT_LOOKUP_TABLE; | |
double *temp; | |
uint8_t *REVERSE_BITS; | |
if(!state) | |
return NULL; | |
state->FILT_LOOKUP_PARTS = (dsd2pcm_FILTER_COEFFS_COUNT + 7) / 8; | |
const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS; | |
// The current 128 tap FIR leads to an 8 KB lookup table | |
state->FILT_LOOKUP_TABLE = (float *)calloc(sizeof(float), FILT_LOOKUP_PARTS << 8); | |
if(!state->FILT_LOOKUP_TABLE) | |
goto fail; | |
FILT_LOOKUP_TABLE = state->FILT_LOOKUP_TABLE; | |
temp = (double *)calloc(sizeof(double), 0x100); | |
if(!temp) | |
goto fail; | |
for(int part = 0, sofs = 0, dofs = 0; part < FILT_LOOKUP_PARTS;) { | |
memset(temp, 0, 0x100 * sizeof(double)); | |
for(int bit = 0, bitmask = 0x80; bit < 8 && sofs + bit < dsd2pcm_FILTER_COEFFS_COUNT;) { | |
double coeff = dsd2pcm_FILTER_COEFFS[sofs + bit]; | |
for(int bite = 0; bite < 0x100; bite++) { | |
if((bite & bitmask) == 0) { | |
temp[bite] -= coeff; | |
} else { | |
temp[bite] += coeff; | |
} | |
} | |
bit++; | |
bitmask >>= 1; | |
} | |
for(int s = 0; s < 0x100;) { | |
FILT_LOOKUP_TABLE[dofs++] = (float)temp[s++]; | |
} | |
part++; | |
sofs += 8; | |
} | |
free(temp); | |
{ // calculate FIFO stuff | |
int k = 1; | |
while(k < FILT_LOOKUP_PARTS * 2) k <<= 1; | |
state->FIFO_LENGTH = k; | |
state->FIFO_OFS_MASK = k - 1; | |
} | |
state->REVERSE_BITS = (uint8_t *)calloc(1, 0x100); | |
if(!state->REVERSE_BITS) | |
goto fail; | |
REVERSE_BITS = state->REVERSE_BITS; | |
for(int i = 0, j = 0; i < 0x100; i++) { | |
REVERSE_BITS[i] = (uint8_t)j; | |
// "reverse-increment" of j | |
for(int bitmask = 0x80;;) { | |
if(((j ^= bitmask) & bitmask) != 0) break; | |
if(bitmask == 1) break; | |
bitmask >>= 1; | |
} | |
} | |
state->fifo = (int *)calloc(sizeof(int), state->FIFO_LENGTH); | |
if(!state->fifo) | |
goto fail; | |
dsd2pcm_reset(state); | |
return (void *)state; | |
fail: | |
dsd2pcm_free(state); | |
return NULL; | |
} | |
static void *dsd2pcm_dup(void *_state) { | |
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state; | |
if(state) { | |
struct dsd2pcm_state *newstate = (struct dsd2pcm_state *)calloc(1, sizeof(struct dsd2pcm_state)); | |
if(newstate) { | |
newstate->FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS; | |
newstate->FIFO_LENGTH = state->FIFO_LENGTH; | |
newstate->FIFO_OFS_MASK = state->FIFO_OFS_MASK; | |
newstate->fpos = state->fpos; | |
newstate->FILT_LOOKUP_TABLE = (float *)calloc(sizeof(float), state->FILT_LOOKUP_PARTS << 8); | |
if(!newstate->FILT_LOOKUP_TABLE) | |
goto fail; | |
memcpy(newstate->FILT_LOOKUP_TABLE, state->FILT_LOOKUP_TABLE, sizeof(float) * (state->FILT_LOOKUP_PARTS << 8)); | |
newstate->REVERSE_BITS = (uint8_t *)calloc(1, 0x100); | |
if(!newstate->REVERSE_BITS) | |
goto fail; | |
memcpy(newstate->REVERSE_BITS, state->REVERSE_BITS, 0x100); | |
newstate->fifo = (int *)calloc(sizeof(int), state->FIFO_LENGTH); | |
if(!newstate->fifo) | |
goto fail; | |
memcpy(newstate->fifo, state->fifo, sizeof(int) * state->FIFO_LENGTH); | |
return (void *)newstate; | |
} | |
fail: | |
dsd2pcm_free(newstate); | |
return NULL; | |
} | |
return NULL; | |
} | |
static void dsd2pcm_free(void *_state) { | |
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state; | |
if(state) { | |
free(state->fifo); | |
free(state->REVERSE_BITS); | |
free(state->FILT_LOOKUP_TABLE); | |
free(state); | |
} | |
} | |
static void dsd2pcm_reset(void *_state) { | |
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state; | |
const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS; | |
int *fifo = state->fifo; | |
for(int i = 0; i < FILT_LOOKUP_PARTS; i++) { | |
fifo[i] = 0x55; | |
fifo[i + FILT_LOOKUP_PARTS] = 0xAA; | |
} | |
state->fpos = FILT_LOOKUP_PARTS; | |
} | |
static int dsd2pcm_latency(void *_state) { | |
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state; | |
if(state) | |
return state->FIFO_LENGTH; | |
else | |
return 0; | |
} | |
static void dsd2pcm_process(void *_state, const uint8_t *src, size_t sofs, size_t sinc, float *dest, size_t dofs, size_t dinc, size_t len) { | |
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state; | |
int bite1, bite2, temp; | |
float sample; | |
int *fifo = state->fifo; | |
const uint8_t *REVERSE_BITS = state->REVERSE_BITS; | |
const float *FILT_LOOKUP_TABLE = state->FILT_LOOKUP_TABLE; | |
const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS; | |
const int FIFO_OFS_MASK = state->FIFO_OFS_MASK; | |
int fpos = state->fpos; | |
while(len > 0) { | |
fifo[fpos] = REVERSE_BITS[fifo[fpos]] & 0xFF; | |
fifo[(fpos + FILT_LOOKUP_PARTS) & FIFO_OFS_MASK] = src[sofs] & 0xFF; | |
sofs += sinc; | |
temp = (fpos + 1) & FIFO_OFS_MASK; | |
sample = 0; | |
for(int k = 0, lofs = 0; k < FILT_LOOKUP_PARTS;) { | |
bite1 = fifo[(fpos - k) & FIFO_OFS_MASK]; | |
bite2 = fifo[(temp + k) & FIFO_OFS_MASK]; | |
sample += FILT_LOOKUP_TABLE[lofs + bite1] + FILT_LOOKUP_TABLE[lofs + bite2]; | |
k++; | |
lofs += 0x100; | |
} | |
fpos = temp; | |
dest[dofs] = sample; | |
dofs += dinc; | |
len--; | |
} | |
state->fpos = fpos; | |
} | |
static void convert_dsd_to_f32(float *output, const uint8_t *input, size_t count, size_t channels, void **dsd2pcm) { | |
for(size_t channel = 0; channel < channels; ++channel) { | |
dsd2pcm_process(dsd2pcm[channel], input, channel, channels, output, channel, channels, count); | |
} | |
} | |
#endif | |
int main(void) { | |
unsigned long long sampleCount = 1024 * 1024 * 32; | |
uint8_t *dsd_random = new uint8_t[sampleCount]; | |
for(size_t i = 0; i < sampleCount; ++i) { | |
dsd_random[i] = rand(); | |
} | |
double srcRate = 44100.0 * 64.0; | |
double dstRate = 48000.0; | |
float *inputData; | |
#ifdef DSD_DECIMATOR | |
srcRate /= 8.0; | |
void *dsd2pcm = dsd2pcm_alloc(); | |
uint32_t dsd2pcmLatency = dsd2pcm_latency(dsd2pcm); | |
uint8_t *dsd_silence = new uint8_t[dsd2pcmLatency]; | |
float *dsd2pcm_temp = new float[sampleCount + dsd2pcmLatency]; | |
memset(dsd_silence, 0x55, dsd2pcmLatency); | |
convert_dsd_to_f32(dsd2pcm_temp, dsd_random, sampleCount, 1, &dsd2pcm); | |
convert_dsd_to_f32(dsd2pcm_temp + sampleCount, dsd_silence, dsd2pcmLatency, 1, &dsd2pcm); | |
inputData = dsd2pcm_temp + dsd2pcmLatency; | |
#else | |
float *dsd2pcm_temp = new float[sampleCount * 8]; | |
float *ptr = dsd2pcm_temp; | |
for(size_t i = 0; i < sampleCount; ++i) { | |
uint8_t sample = dsd_random[i]; | |
for(size_t j = 128; j > 0; j >>= 1) { | |
if (sample & j) *ptr = +1.0; | |
else *ptr = -1.0; | |
++ptr; | |
} | |
} | |
inputData = dsd2pcm_temp; | |
#endif | |
#ifndef DSD_DECIMATOR | |
sampleCount *= 8; | |
#endif | |
float *outputBuffer = new float[1024]; | |
unsigned long long ol = sampleCount * dstRate / srcRate; | |
unsigned long long ip = 0; | |
r8bstate *state = new r8bstate(1, 1024, srcRate, dstRate); | |
while(ol > 0) { | |
size_t blockCount = ol > 1024 ? 1024 : ol; | |
size_t inputAvail = sampleCount - ip; | |
size_t outputGenerated = 0; | |
size_t inputUsed = 0; | |
if(inputAvail > 1024) inputAvail = 1024; | |
if(inputAvail) { | |
outputGenerated = state->resample(&inputData[ip], inputAvail, &inputUsed, outputBuffer, blockCount); | |
ip += inputUsed; | |
} else { | |
outputGenerated = state->flush(outputBuffer, blockCount); | |
} | |
if(outputGenerated > ol) { | |
ol = 0; | |
} else { | |
ol -= outputGenerated; | |
} | |
} | |
delete state; | |
delete[] outputBuffer; | |
#ifdef DSD_DECIMATOR | |
dsd2pcm_free(dsd2pcm); | |
#endif | |
delete[] dsd2pcm_temp; | |
delete[] dsd_random; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment