Last active
November 9, 2020 22:20
-
-
Save iscgar/0d559c17d65b78de624eb124beabf0bf to your computer and use it in GitHub Desktop.
Hand-rolled, compiler copy FIzzBuzz (thanks, Gal!)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string.h> | |
#include <unistd.h> | |
#define CACHELINE 64 | |
#define ALIGNED_BUF 65536 | |
#define FIZZ "Fizz" | |
#define BUZZ "Buzz" | |
#define DELIM " " | |
typedef struct { | |
unsigned char offset; | |
char data[CACHELINE - sizeof(unsigned char)]; | |
} counters_t; | |
#define CPYEL(i) do { \ | |
const char *end = counters[i].data + counters[i].offset; \ | |
const unsigned int n = sizeof(counters[i].data) - counters[i].offset - 1; \ | |
memcpy(off, end, n); \ | |
off += n; \ | |
} while (0) | |
enum | |
{ | |
CARRY = 0, | |
VALUE = 1, | |
OFFSET = 2, | |
}; | |
int main(void) | |
{ | |
__attribute__((aligned(CACHELINE))) static counters_t counters[] = { | |
{ sizeof(counters[0].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000001" DELIM }, | |
{ sizeof(counters[1].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000002" DELIM }, | |
{ sizeof(counters[2].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000004" DELIM }, | |
{ sizeof(counters[3].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000007" DELIM }, | |
{ sizeof(counters[4].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000008" DELIM }, | |
{ sizeof(counters[5].data) - sizeof(DELIM) - 2, "0000000000000000000000000000000000000000000000000000000000011" DELIM }, | |
{ sizeof(counters[6].data) - sizeof(DELIM) - 2, "0000000000000000000000000000000000000000000000000000000000013" DELIM }, | |
{ sizeof(counters[7].data) - sizeof(DELIM) - 2, "0000000000000000000000000000000000000000000000000000000000014" DELIM }, | |
}; | |
__attribute__((aligned(4096))) static char buf[ALIGNED_BUF + (sizeof(counters[0].data) * 15)] = { 0 }; | |
char *off = buf; | |
for (;;) | |
{ | |
while (off - buf < ALIGNED_BUF) | |
{ | |
if (__builtin_expect(counters[0].data[0] != '0', 0)) | |
{ | |
goto end; | |
} | |
CPYEL(0); | |
CPYEL(1); | |
memcpy(off, FIZZ DELIM, sizeof(FIZZ DELIM) - 1); | |
off += sizeof(FIZZ DELIM) - 1; | |
CPYEL(2); | |
memcpy(off, BUZZ DELIM FIZZ DELIM, sizeof(BUZZ DELIM FIZZ DELIM) - 1); | |
off += sizeof(BUZZ DELIM FIZZ DELIM) - 1; | |
CPYEL(3); | |
CPYEL(4); | |
memcpy(off, FIZZ DELIM BUZZ DELIM, sizeof(FIZZ DELIM BUZZ DELIM) - 1); | |
off += sizeof(FIZZ DELIM BUZZ DELIM) - 1; | |
CPYEL(5); | |
memcpy(off, FIZZ DELIM, sizeof(FIZZ DELIM) - 1); | |
off += sizeof(FIZZ DELIM) - 1; | |
CPYEL(6); | |
CPYEL(7); | |
memcpy(off, FIZZ BUZZ DELIM, sizeof(FIZZ BUZZ DELIM) - 1); | |
off += sizeof(FIZZ BUZZ DELIM) - 1; | |
// Increment the counters | |
#define ADVANCE(i) \ | |
if (counters[i].data[digit_offset] > '9') { \ | |
counters[i].data[digit_offset] -= 10; \ | |
parallel_inc[OFFSET][i] = digit_offset; \ | |
++parallel_inc[CARRY][i]; \ | |
b = 1; \ | |
} | |
#define FIXOFF(i) \ | |
if (parallel_inc[OFFSET][i] <= counters[i].offset) { \ | |
counters[i].offset = parallel_inc[OFFSET][i] - 1; \ | |
} \ | |
// carry, value, offset in SoA layout | |
char parallel_inc[3][8]; | |
memset(parallel_inc[CARRY], 1, sizeof(parallel_inc[CARRY])); | |
memset(parallel_inc[VALUE], 5, sizeof(parallel_inc[VALUE])); | |
memset(parallel_inc[OFFSET], sizeof(counters[0].data) - sizeof(DELIM) - 1, sizeof(parallel_inc[OFFSET])); | |
for (unsigned digit_offset = sizeof(counters[0].data) - sizeof(DELIM) - 1;; --digit_offset) | |
{ | |
int b = 0; | |
counters[0].data[digit_offset] += parallel_inc[VALUE][0]; | |
counters[1].data[digit_offset] += parallel_inc[VALUE][1]; | |
counters[2].data[digit_offset] += parallel_inc[VALUE][2]; | |
counters[3].data[digit_offset] += parallel_inc[VALUE][3]; | |
counters[4].data[digit_offset] += parallel_inc[VALUE][4]; | |
counters[5].data[digit_offset] += parallel_inc[VALUE][5]; | |
counters[6].data[digit_offset] += parallel_inc[VALUE][6]; | |
counters[7].data[digit_offset] += parallel_inc[VALUE][7]; | |
ADVANCE(0); | |
ADVANCE(1); | |
ADVANCE(2); | |
ADVANCE(3); | |
ADVANCE(4); | |
ADVANCE(5); | |
ADVANCE(6); | |
ADVANCE(7); | |
memcpy(parallel_inc[VALUE], parallel_inc[CARRY], sizeof(parallel_inc[CARRY])); | |
memset(parallel_inc[CARRY], 0, sizeof(parallel_inc[CARRY])); | |
if (!b) | |
{ | |
FIXOFF(0); | |
FIXOFF(1); | |
FIXOFF(2); | |
FIXOFF(3); | |
FIXOFF(4); | |
FIXOFF(5); | |
FIXOFF(6); | |
FIXOFF(7); | |
break; | |
} | |
} | |
} | |
write(1, buf, ALIGNED_BUF); | |
memcpy(buf, buf + ALIGNED_BUF, (off - buf) % ALIGNED_BUF); | |
off -= ALIGNED_BUF; | |
} | |
end: | |
*off++ = '\n'; | |
write(1, buf, off - buf); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment