Created
March 26, 2018 19:05
-
-
Save mratsim/032b0388536690bbb427cd5cb1ebeef3 to your computer and use it in GitHub Desktop.
(WIP) integer division by Burnikel and Zeigler in Nim
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| proc div3n2n(a12, a3, b, b1, b2: BaseUint, n: int): tuple[quot, rem: BaseUint] {.noinit, noSideEffect.} | |
| # Forward declaration | |
| proc div2n1n[T, U: BaseUint](a: T, b: U, n: int): tuple[quot, rem: U] {.noinit, noSideEffect.}= | |
| ## Divide a 2n-bit uint with a n-bit uint. | |
| ## This is part of the implementation of | |
| ## a recursive asymptotically fast division algorithm by Burnikel and Ziegler | |
| if n <= 1000: | |
| return divmod(a, b) | |
| let pad = bool(n and 1) | |
| var | |
| a = a | |
| b = b | |
| n = n | |
| if pad: # Make sure we respect the algorithm preconditions | |
| # i.e for a 64-bit int, 2^64/2 <= b < 2^64 | |
| a = a shl 1 | |
| b = b shl 1 | |
| n += 1 | |
| let | |
| half_n = n div 2 | |
| mask = (one(T) shl half_n) - one(T) | |
| let | |
| b1 = b shr half_n # due to the padding if we normalize this is not always b.hi | |
| b2 = b and mask | |
| q1r = div3n2n(a shr n, (a shr n) and mask, b, b1, b2, half_n) | |
| q2s = div3n2n(q1r.rem, a and mask, b, b1, b2, half_n) | |
| if pad: | |
| result.rem = q2s.rem shr 1 | |
| else: | |
| result.rem = q2s.rem | |
| result.quot = (q1r.quot shl half_n) or q2s.quot | |
| proc div3n2n(a12, a3, b, b1, b2: BaseUint, n: int): tuple[quot, rem: BaseUint] {.noinit, noSideEffect.}= | |
| ## Divides 3 halves by 2 | |
| const one1 = one(type a12) | |
| if a12 shr n == b1: | |
| let hi = one1 shl n - one1 | |
| result.quot = hi | |
| result.rem = a12 - (b1 shl n) + b1 | |
| else: | |
| result = div2n1n(a12, b1, n) | |
| var ca3 = (result.rem shl n or a3) | |
| let d = result.quot * b2 | |
| # Due to padding we are off by at most 2 | |
| if ca3 < d: | |
| result.quot -= one1 | |
| ca3 += b | |
| if ca3 < d: | |
| result.quot -= one1 | |
| ca3 += b | |
| result.rem = ca3 - d | |
| proc divmod_BZ*(x, y: MpUintImpl): tuple[quot, rem: MpUintImpl] {.noinit, noSideEffect.}= | |
| ## We use grade-school div to split a into "digits" in base 2^n, n = bit_length(b) | |
| type T = type x | |
| let | |
| n = y.bit_length - 2 | |
| mask = (one(T) shl n) - one(T) | |
| var x = x | |
| var x_digits = zero(T) | |
| while not x.isZero: | |
| x_digits = x_digits shl n | |
| x_digits += x and mask | |
| x = x shr n | |
| let unit = x_digits and mask | |
| if unit >= y: | |
| result.rem = zero(T) | |
| else: | |
| result.rem = unit | |
| x_digits = x_digits shr n | |
| result.quot = zero(T) | |
| var q_digit: T | |
| while not x_digits.isZero: | |
| (q_digit, result.rem) = div2n1n((result.rem shl n) + (x_digits and mask), y, n) | |
| x_digits = x_digits shr n | |
| result.quot = (result.quot shl n) + q_digit | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment