Created
August 19, 2020 09:33
-
-
Save Eisenwave/b8e9311d4da2b317a0f5b0af6640b09b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* Long division, unsigned (64/32 ==> 32). This procedure performs unsigned "long division" i.e., division of a 64-bit unsigned dividend by a 32-bit unsigned divisor, producing a 32-bit quotient. In the overflow cases (divide by 0, or quotient exceeds 32 bits), it returns a remainder of 0xFFFFFFFF (an impossible value). The dividend is u1 and u0, with u1 being the most significant word. The divisor is parameter v. The value returned is the quotient. Max line length is 57, to fit in hacker.book. */ | |
#define max(x, y)((x) > (y) ? (x) : (y)) | |
int nlz(unsigned x) { | |
int n; | |
if (x == 0) return (32); | |
n = 0; | |
if (x <= 0x0000FFFF) { | |
n = n + 16; | |
x = x << 16; | |
} | |
if (x <= 0x00FFFFFF) { | |
n = n + 8; | |
x = x << 8; | |
} | |
if (x <= 0x0FFFFFFF) { | |
n = n + 4; | |
x = x << 4; | |
} | |
if (x <= 0x3FFFFFFF) { | |
n = n + 2; | |
x = x << 2; | |
} | |
if (x <= 0x7FFFFFFF) { | |
n = n + 1; | |
} | |
return n; | |
} | |
unsigned myrand(void) { // Expands rand() to 32 bits. | |
return rand() ^ (rand() << 1); | |
} /* This first version is Knuth's Algorithm D (my "divmnu") pretty much mechanically reduced to the special case m = 4, n = 2. But some details differ. The parameters are fullwords passed in registers, and it returns an error indication if the result cannot be expressed in 32 bits (divmnu would return a 3-halfword quotient). */ | |
unsigned divlu(unsigned u1, unsigned u0, unsigned v, unsigned * r) { | |
const unsigned b = 65536; // Number base (16 bits). | |
unsigned short un[4], // Dividend and divisor | |
vn[2]; // normalized and broken | |
// up into halfwords. | |
unsigned short q[2]; // Quotient as halfwords. | |
unsigned un1, un0, // Dividend and divisor vn0; // as fullwords. | |
unsigned qhat; // Estimated quotient digit. | |
unsigned rhat; // A remainder. | |
unsigned p; // Product of two digits. | |
int s, i, j, t, k; | |
if (u1 >= v) { // If overflow, set rem. | |
if (r != NULL) // to an impossible value, | |
* | |
r = 0xFFFFFFFF; // and return the largest | |
return 0xFFFFFFFF; | |
} // possible quotient. | |
s = nlz(v); // 0 <= s <= 31. | |
vn0 = v << s; // Normalize divisor. | |
vn[1] = vn0 >> 16; // Break divisor up into | |
vn[0] = vn0 & 0xFFFF; // two 16-bit halves. | |
// Shift dividend left. | |
un1 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31); | |
un0 = u0 << s; | |
un[3] = un1 >> 16; // Break dividend up into | |
un[2] = un1; // four 16-bit halfwords | |
un[1] = un0 >> 16; // Note: storing into | |
un[0] = un0; // halfwords truncates. | |
for (j = 1; j >= 0; j--) { // Compute estimate qhat of | |
q[j].qhat = (un[j + 2] * b + un[j + 1]) / vn[1]; | |
rhat = (un[j + 2] * b + un[j + 1]) - qhat * vn[1]; | |
again: if (qhat >= b || qhat * vn[0] > b * rhat + un[j]) { | |
qhat = qhat - 1; | |
rhat = rhat + vn[1]; | |
if (rhat < b) goto again; | |
} | |
// Multiply and subtract. | |
k = 0; | |
for (i = 0; i < 2; i++) { | |
p = qhat * vn[i]; | |
t = un[i + j] - k - (p & 0xFFFF); | |
un[i + j] = t; | |
k = (p >> 16) - (t >> 16); | |
} | |
t = un[j + 2] - k; | |
un[j + 2] = t; | |
q[j] = qhat; // Store quotient digit. | |
if (t < 0) { // If we subtracted too | |
q[j] = q[j] - 1; // much, add back. | |
printf("Doing add back, %08x %08x %08x\n", u1, u0, v); | |
k = 0; | |
for (i = 0; i < 2; i++) { | |
t = un[i + j] + vn[i] + k; | |
un[i + j] = t; | |
k = t >> 16; | |
} | |
un[j + 2] = un[j + 2] + k; | |
} | |
} // End j. | |
if (r != NULL) // If remainder is wanted, | |
* | |
r = (un[1] * b + un[0]) >> s; // return it. | |
return q[1] * b + q[0]; | |
} /* This version is simplified from Knuth's because the adding back step is never required, as the estimated quotient digit qhat is always exact. Also, the inner loop is coded in a more efficient way (not possible in divmnu), unrolled, and obvious resulting simplifications made. And, the quotient and divisor are elementary variables (kept in registers) rather than arrays. */ | |
unsigned divlu1(unsigned u1, unsigned u0, unsigned v, unsigned * r) { | |
const unsigned b = 65536; // Number base (16 bits). | |
unsigned short un[4], // Dividend and divisor | |
vn1, vn0; // normalized and broken // up into halfwords. | |
unsigned q; // Quotient. | |
unsigned un1, un0; // Dividend as fullwords. | |
unsigned qhat; // Estimated quotient digit. | |
unsigned rhat; // A remainder. | |
unsigned p; // Product of two digits. | |
int s, j, t, k; | |
if (u1 >= v) { // If overflow, set rem. | |
if (r != NULL) // to an impossible value, | |
* | |
r = 0xFFFFFFFF; // and return the largest | |
return 0xFFFFFFFF; | |
} // possible quotient. | |
s = nlz(v); // 0 <= s <= 31. | |
v = v << s; // Normalize divisor. | |
vn1 = v >> 16; // Break divisor up into | |
vn0 = v & 0xFFFF; // two 16-bit halves. // Shift dividend left. | |
un1 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31); | |
un0 = u0 << s; | |
un[3] = un1 >> 16; // Break dividend up into | |
un[2] = un1; // four 16-bit halfwords | |
un[1] = un0 >> 16; // Note: storing into | |
un[0] = un0; // halfwords truncates. | |
for (j = 1; j >= 0; j--) { // Compute estimate qhat of current digit of q. | |
qhat = (un[j + 2] * b + un[j + 1]) / vn1; | |
rhat = (un[j + 2] * b + un[j + 1]) - qhat * vn1; | |
again: if (qhat >= b || qhat * vn0 > b * rhat + un[j]) { | |
qhat = qhat - 1; | |
rhat = rhat + vn1; | |
if (rhat < b) goto again; | |
} q = q * b + qhat; // Put away quotient digit. // Multiply and subtract. | |
p = qhat * vn0; | |
t = un[j] - p; | |
un[j] = t; // Store right half of t. | |
k = t >> 16; | |
p = qhat * vn1; | |
t = un[j + 1] + k - p; | |
un[j + 1] = t; // Store right half of t. | |
} // End j. | |
if (r != NULL) // If remainder is wanted, | |
* | |
r = (un[1] * b + un[0]) >> s; // return it. | |
return q; | |
} /* This version is divlu1 except with outer loop unrolled, and array un changed into local variables. Several of the variables below could be "short," but having them fullwords gives better code on gcc/Intel. Statistics: Based on one million executions of this program, with uniformly distributed random values for the dividend and divisor, the number of times in each loop per execution of the program is: again1: 0.4060 again2: 0.3469 This is the version that's in the hacker book. */ | |
// ------------------------------ cut ---------------------------------- | |
unsigned divlu2(unsigned u1, unsigned u0, unsigned v, unsigned * r) { | |
const unsigned b = 65536; // Number base (16 bits). | |
unsigned un1, un0, // Norm. dividend LSD's. | |
vn1, vn0, // Norm. divisor digits. | |
q1, q0, // Quotient digits. | |
un32, un21, un10, // Dividend digit pairs. | |
rhat; // A remainder. | |
int s; // Shift amount for norm. | |
if (u1 >= v) { // If overflow, set rem. | |
if (r != NULL) // to an impossible value, | |
* | |
r = 0xFFFFFFFF; // and return the largest | |
eturn 0xFFFFFFFF; | |
} // possible quotient. | |
s = nlz(v); // 0 <= s <= 31. | |
v = v << s; // Normalize divisor. | |
vn1 = v >> 16; // Break divisor up into | |
vn0 = v & 0xFFFF; // two 16-bit digits. | |
un32 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31); | |
un10 = u0 << s; // Shift dividend left. | |
un1 = un10 >> 16; // Break right half of | |
un0 = un10 & 0xFFFF; // dividend into two digits. | |
q1 = un32 / vn1; // Compute the first | |
rhat = un32 - q1 * vn1; // quotient digit, | |
q1.again1: if (q1 >= b || q1 * vn0 > b * rhat + un1) { | |
q1 = q1 - 1; | |
rhat = rhat + vn1; | |
if (rhat < b) goto again1; | |
} un21 = un32 * b + un1 - q1 * v; // Multiply and subtract. | |
q0 = un21 / vn1; // Compute the second | |
rhat = un21 - q0 * vn1; // quotient digit, q0. again2: | |
if (q0 >= b || q0 * vn0 > b * rhat + un0) { | |
q0 = q0 - 1; | |
rhat = rhat + vn1; | |
if (rhat < b) goto again2; | |
} | |
if (r != NULL) // If remainder is wanted, | |
* | |
r = (un21 * b + un0 - q0 * v) >> s; // return it. | |
return q1 * b + q0; | |
} | |
// ------------------------------ cut ---------------------------------- | |
int errors; | |
void check(char * name, unsigned q, unsigned r, unsigned u1, unsigned u0, unsigned v, unsigned cq, unsigned cr) { | |
if (q != cq || r != cr) { | |
errors = errors + 1; | |
printf("Error in %s, dividend u = %08x %08x\n", name, u1, u0); | |
printf(" divisor v = %08x\n", v); | |
printf("Got q = %08x, r = %08x\n", q, r); | |
printf("Should get cq = %08x, cr = %08x\n", cq, cr); | |
} | |
return; | |
} | |
int main() { | |
static unsigned test[] = { // u..., v..., | |
0, | |
0, | |
0, // Overflow. | |
1, | |
0, | |
0, // Overflow. | |
1, | |
0, | |
1, // Overflow. | |
0xffffffff, | |
0, | |
0xffffffff, // Overflow. | |
0, | |
1, | |
1, | |
0, | |
7, | |
3, | |
0x00000000, | |
0xffffffff, | |
0x00000001, | |
0x01234567, | |
0x89abcdef, | |
0x01234568, | |
0x01234567, | |
0x89abcdef, | |
0x12345678, | |
0x01234567, | |
0x89abcdef, | |
0x80000000, | |
0x01234567, | |
0x89abcdef, | |
0x80000001, | |
0x01234567, | |
0x89abcdef, | |
0x80007fff, | |
0x01234567, | |
0x89abcdef, | |
0x80008000, | |
0x01234567, | |
0x89abcdef, | |
0x80008001, | |
0x01234567, | |
0x89abcdef, | |
0x8000ffff, | |
0x00000000, | |
0x00000000, | |
0x0000ffff, | |
0x00000000, | |
0x0000ffff, | |
0x0000ffff, | |
0x00000000, | |
0xffff0000, | |
0x0000ffff, | |
0x00000000, | |
0xffffffff, | |
0x0000ffff, | |
0x0000fffe, | |
0x00000000, | |
0x0000ffff, | |
0x0000fffe, | |
0x0000ffff, | |
0x0000ffff, | |
0x0000fffe, | |
0xffff0000, | |
0x0000ffff, | |
0x0000fffe, | |
0xffffffff, | |
0x0000ffff, | |
0x00000000, | |
0x00000000, | |
0xffff0000, | |
0x00000000, | |
0x0000ffff, | |
0xffff0000, | |
0x00000000, | |
0xffff0000, | |
0xffff0000, | |
0x00000000, | |
0xffffffff, | |
0xffff0000, | |
0x0000ffff, | |
0x00000000, | |
0xffff0000, | |
0x0000ffff, | |
0x0000ffff, | |
0xffff0000, | |
0x0000ffff, | |
0xffff0000, | |
0xffff0000, | |
0x0000ffff, | |
0xffffffff, | |
0xffff0000, | |
0xfffe0000, | |
0x00000000, | |
0xffff0000, | |
0xfffe0000, | |
0x0000ffff, | |
0xffff0000, | |
0xfffe0000, | |
0xffff0000, | |
0xffff0000, | |
0xfffe0000, | |
0xffffffff, | |
0xffff0000, | |
0xfffeffff, | |
0x00000000, | |
0xffff0000, | |
0xfffeffff, | |
0x0000ffff, | |
0xffff0000, | |
0xfffeffff, | |
0xffff0000, | |
0xffff0000, | |
0xfffeffff, | |
0xffffffff, | |
0xffff0000, | |
0x00000000, | |
0x00000000, | |
0xffffffff, | |
0x00000000, | |
0x0000ffff, | |
0xffffffff, | |
0x00000000, | |
0xffff0000, | |
0xffffffff, | |
0x00000000, | |
0xffffffff, | |
0xffffffff, | |
0x0000ffff, | |
0x00000000, | |
0xffffffff, | |
0x0000ffff, | |
0x0000ffff, | |
0xffffffff, | |
0x0000ffff, | |
0xffff0000, | |
0xffffffff, | |
0x0000ffff, | |
0xffffffff, | |
0xffffffff, | |
0xffff0000, | |
0x00000000, | |
0xffffffff, | |
0xffff0000, | |
0x0000ffff, | |
0xffffffff, | |
0xffff0000, | |
0xffff0000, | |
0xffffffff, | |
0xffff0000, | |
0xffffffff, | |
0xffffffff, | |
0xfffffffe, | |
0x00000000, | |
0xffffffff, | |
0xfffffffe, | |
0x0000ffff, | |
0xffffffff, | |
0xfffffffe, | |
0xffff0000, | |
0xffffffff, | |
0xfffffffe, | |
0xffffffff, | |
0xffffffff, | |
0x12345678, | |
0x9abcdef0, | |
0x12345679, | |
0x00010000, | |
0x00000000, | |
0x00010001, | |
0x80008000, | |
0x00000000, | |
0x80008001, // First qhat = b + 1. | |
0x8000fffe, | |
0xffffffff, | |
0x8000ffff, // First qhat = b + 1. | |
0x7fff8000, | |
0x00000000, | |
0x8000ffff, // First qhat too big by 2. | |
}; | |
int i, n; | |
unsigned u1, u0, v, q, r, cq, cr; | |
n = sizeof(test) / 12 + 10; | |
for (i = 0; i < n; i++) { | |
if (i < sizeof(test) / 12) { | |
u1 = test[3 * i]; | |
u0 = test[3 * i + 1]; | |
v = test[3 * i + 2]; | |
} else { | |
u1 = myrand(); | |
u0 = myrand(); | |
v = myrand(); | |
if (v == 0) v = 1; | |
if (u1 >= v) u1 = u1 & (v - 1); | |
} | |
if (i <= 3) { // If intentional overflow case: | |
cq = 0xFFFFFFFF;; | |
cr = 0xFFFFFFFF; | |
} else { // Non-overflow case: | |
cq = ((unsigned long long) u1 * 0x100000000 LL + u0) / v; | |
cr = ((unsigned long long) u1 * 0x100000000 LL + u0) - (unsigned long long) cq * v; | |
} // | |
printf("u = %08x %08x v = %08x cq = %08x cr = %08x\r\n", u1, u0, v, cq, cr); | |
q = divlu(u1, u0, v, & r); | |
check("divlu", q, r, u1, u0, v, cq, cr); | |
q = divlu1(u1, u0, v, & r); | |
check("divlu1", q, r, u1, u0, v, cq, cr); | |
q = divlu2(u1, u0, v, & r); | |
check("divlu2", q, r, u1, u0, v, cq, cr); | |
} | |
if (errors == 0) printf("Passed all %d cases.\n", n); | |
else printf("Got %d errors out of %d cases.\n", errors, n); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment