Last active
December 18, 2015 05:29
-
-
Save sh1boot/5732738 to your computer and use it in GitHub Desktop.
experimental table-based divide operation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#define TABBITS 8 | |
#if 0 | |
extern uint32_t divfn(uint16_t i, uint16_t j); | |
#else | |
#if TABBITS != 8 | |
uint32_t table2[(1 << TABBITS) + 1]; | |
#else/*{{{*/ | |
static const uint32_t table2[256 + 1] __attribute__((progmem)) = { | |
0xffffffff, 0x00000000, 0x80000000, 0x55555555, 0x40000000, 0x33333333, 0x2aaaaaab, 0x24924925, 0x20000000, 0x1c71c71c, 0x1999999a, 0x1745d174, 0x15555555, 0x13b13b14, 0x12492492, 0x11111111, | |
0x10000000, 0x0f0f0f0f, 0x0e38e38e, 0x0d79435e, 0x0ccccccd, 0x0c30c30c, 0x0ba2e8ba, 0x0b21642d, 0x0aaaaaab, 0x0a3d70a4, 0x09d89d8a, 0x097b425f, 0x09249249, 0x08d3dcb1, 0x08888889, 0x08421084, | |
0x08000000, 0x07c1f07c, 0x07878788, 0x07507507, 0x071c71c7, 0x06eb3e45, 0x06bca1af, 0x06906907, 0x06666666, 0x063e7064, 0x06186186, 0x05f417d0, 0x05d1745d, 0x05b05b06, 0x0590b216, 0x0572620b, | |
0x05555555, 0x0539782a, 0x051eb852, 0x05050505, 0x04ec4ec5, 0x04d4873f, 0x04bda12f, 0x04a7904a, 0x04924925, 0x047dc11f, 0x0469ee58, 0x0456c798, 0x04444444, 0x04325c54, 0x04210842, 0x04104104, | |
0x04000000, 0x03f03f04, 0x03e0f83e, 0x03d22635, 0x03c3c3c4, 0x03b5cc0f, 0x03a83a84, 0x039b0ad1, 0x038e38e4, 0x0381c0e0, 0x03759f23, 0x0369d037, 0x035e50d8, 0x03531dec, 0x03483483, 0x033d91d3, | |
0x03333333, 0x03291620, 0x031f3832, 0x03159722, 0x030c30c3, 0x03030303, 0x02fa0be8, 0x02f14990, 0x02e8ba2f, 0x02e05c0c, 0x02d82d83, 0x02d02d03, 0x02c8590b, 0x02c0b02c, 0x02b93105, 0x02b1da46, | |
0x02aaaaab, 0x02a3a0fd, 0x029cbc15, 0x0295fad4, 0x028f5c29, 0x0288df0d, 0x02828283, 0x027c4598, 0x02762762, 0x02702702, 0x026a439f, 0x02647c69, 0x025ed098, 0x02593f6a, 0x0253c825, 0x024e6a17, | |
0x02492492, 0x0243f6f0, 0x023ee090, 0x0239e0d6, 0x0234f72c, 0x02302302, 0x022b63cc, 0x0226b902, 0x02222222, 0x021d9ead, 0x02192e2a, 0x0214d021, 0x02108421, 0x020c49ba, 0x02082082, 0x02040810, | |
0x02000000, 0x01fc07f0, 0x01f81f82, 0x01f4465a, 0x01f07c1f, 0x01ecc07b, 0x01e9131b, 0x01e573ad, 0x01e1e1e2, 0x01de5d6e, 0x01dae607, 0x01d77b65, 0x01d41d42, 0x01d0cb59, 0x01cd8569, 0x01ca4b30, | |
0x01c71c72, 0x01c3f8f0, 0x01c0e070, 0x01bdd2b9, 0x01bacf91, 0x01b7d6c4, 0x01b4e81b, 0x01b20364, 0x01af286c, 0x01ac5702, 0x01a98ef6, 0x01a6d01a, 0x01a41a42, 0x01a16d40, 0x019ec8e9, 0x019c2d15, | |
0x0199999a, 0x01970e50, 0x01948b10, 0x01920fb5, 0x018f9c19, 0x018d3019, 0x018acb91, 0x01886e5f, 0x01861862, 0x0183c978, 0x01818182, 0x017f4060, 0x017d05f4, 0x017ad221, 0x0178a4c8, 0x01767dce, | |
0x01745d17, 0x01724288, 0x01702e06, 0x016e1f77, 0x016c16c1, 0x016a13cd, 0x01681681, 0x01661ec7, 0x01642c86, 0x01623fa7, 0x01605816, 0x015e75bc, 0x015c9883, 0x015ac057, 0x0158ed23, 0x01571ed4, | |
0x01555555, 0x01539095, 0x0151d07f, 0x01501501, 0x014e5e0a, 0x014cab88, 0x014afd6a, 0x0149539e, 0x0147ae14, 0x01460cbc, 0x01446f86, 0x0142d662, 0x01414141, 0x013fb014, 0x013e22cc, 0x013c995a, | |
0x013b13b1, 0x013991c3, 0x01381381, 0x013698df, 0x013521d0, 0x0133ae46, 0x01323e35, 0x0130d190, 0x012f684c, 0x012e025c, 0x012c9fb5, 0x012b404b, 0x0129e413, 0x01288b01, 0x0127350c, 0x0125e227, | |
0x01249249, 0x01234568, 0x0121fb78, 0x0120b471, 0x011f7048, 0x011e2ef4, 0x011cf06b, 0x011bb4a4, 0x011a7b96, 0x01194538, 0x01181181, 0x0116e069, 0x0115b1e6, 0x011485f1, 0x01135c81, 0x0112358e, | |
0x01111111, 0x010fef01, 0x010ecf57, 0x010db20b, 0x010c9715, 0x010b7e6f, 0x010a6811, 0x010953f4, 0x01084211, 0x01073261, 0x010624dd, 0x0105197f, 0x01041041, 0x0103091b, 0x01020408, 0x01010101, | |
0x01000000, | |
}; | |
#endif/*}}}*/ | |
uint32_t divfn(uint16_t i, uint16_t j) | |
{ | |
uint32_t t; | |
uint8_t shift = 0; | |
uint8_t k; | |
if (j <= 1) | |
return (int32_t)i << 16; | |
/* if j is too large to index the table, then we just shift it right until | |
* it's within bounds. In this case, we'll use the fractional part (k) for | |
* linear interpolation, and the integer part for the index. | |
*/ | |
t = (uint32_t)j << 8; | |
while (t >= ((uint32_t)1 << (TABBITS + 8))) | |
{ | |
t >>= 1; | |
shift++; | |
} | |
j = t >> 8; | |
k = (uint8_t)t; | |
/* look up (0x80000000 / j) */ | |
t = table2[j]; | |
if (k) | |
{ | |
/* Linearly interpolate between adjacent table entries. Note that this | |
* works well because the set of values which are large and need to be | |
* shifted down and interpolated represent an especially linear part of | |
* the table. | |
*/ | |
uint16_t tmp; | |
uint32_t n = table2[j + 1] - t; | |
uint32_t m; | |
tmp = (uint16_t)((uint8_t)(n >> 0)) * k; | |
tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 8)) * k; | |
m = (uint8_t)tmp; | |
tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 16)) * k; | |
m |= (uint32_t)((uint8_t)tmp) << 8; | |
tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 24)) * k; | |
m |= (uint32_t)((uint8_t)tmp) << 16; | |
t += m; | |
} | |
/* compute i * (0x100000000/j) >> 16 for a 16.16 fixed-point value i/j. | |
* If we had to shift j right, earlier, then we'll shift the result right | |
* as well, because larger should give us a smaller result. | |
*/ | |
{ | |
uint16_t tmp; | |
uint8_t b[4]; | |
uint16_t hi; | |
#if 0 /* correct, but ineffective */ | |
tmp = ((uint16_t)((uint8_t)i) * (uint8_t)t) >> 8; | |
#else | |
tmp = 0x80; | |
#endif | |
tmp += (uint16_t)((uint8_t)i) * (uint8_t)(t >> 8); | |
hi = (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)t; | |
if (shift < 1) | |
hi += 0x80; | |
tmp += (uint8_t)hi; | |
tmp >>= 8; | |
tmp += hi >> 8; | |
tmp += ((uint8_t)i) * (uint8_t)(t >> 16); | |
b[0] = (uint8_t)tmp; | |
tmp >>= 8; | |
tmp += ((uint8_t)i) * (uint8_t)(t >> 24); | |
b[1] = (uint8_t)tmp; | |
b[2] = (uint8_t)(tmp >> 8); | |
tmp = b[0] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 8); | |
b[0] = (uint8_t)tmp; | |
tmp >>= 8; | |
tmp += b[1] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 16); | |
b[1] = (uint8_t)tmp; | |
tmp >>= 8; | |
tmp += b[2] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 24); | |
b[2] = (uint8_t)tmp; | |
b[3] = (uint8_t)(tmp >> 8); | |
t = b[0] | (uint32_t)b[1] << 8 | (uint32_t)b[2] << 16 | (uint32_t)b[3] << 24; | |
} | |
if (shift == 0) | |
return t; | |
while (shift-- > 1) | |
t >>= 1; | |
t++; | |
return t >> 1; | |
} | |
#endif | |
int main(void) | |
{ | |
unsigned long i, j; | |
uint64_t err = 0; | |
#if TABBITS != 8 | |
printf("static const uint32_t table2[%d + 1] __attribute__((progmem)) = {\n0xffffffff, ", 1 << TABBITS); | |
for (j = 1; j < (1 << TABBITS) + 1; j++) | |
{ | |
table2[j] = (0x100000000u + (j / 2)) / j; | |
printf("0x%08x,%c", table2[j], ((j & 15) == 15) ? '\n' : ' '); | |
} | |
printf("\n}\n"); | |
#endif | |
for (j = 1; j < 65536; j += 3) | |
for (i = 0; i < 65536; i += 3) | |
{ | |
uint32_t ref = (uint32_t)(((uint32_t)i << 16) + (j / 2)) / j; | |
#if 0 | |
uint32_t tabent = (0x80000000 + (j / 2)) / j; | |
uint32_t test = ((uint64_t)i * tabent + 0x4000) >> 15; | |
#else | |
uint32_t test = divfn(i, j); | |
#endif | |
int32_t e = (int32_t)(ref - test); | |
if (e > 1) | |
printf("%u/%u==%lu (0x%lx) (not %lu (0x%lx)) error: %ld\n", (unsigned)i, (unsigned)j, ref, ref, test, test, e); | |
err += abs(e); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment