Created
August 23, 2024 16:08
-
-
Save mratsim/afb82da37287fb79dcd6825acec23f65 to your computer and use it in GitHub Desktop.
GCD bench for https://forum.nim-lang.org/t/12353
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std/[cmdline, times, monotimes, random, sugar, os, typetraits] | |
const | |
EnableTests = false | |
EnableCount = not EnableTests and false | |
EnableBench = not EnableTests | |
template bench(repeat: int; init, body: untyped): untyped = | |
var minTime = initDuration(days = 2) | |
for i in 1 .. repeat: | |
init | |
let start = getMonoTime() | |
body | |
let finish = getMonoTime() | |
minTime = min(minTime, finish - start) | |
# Prevent thermal throttling. | |
sleep(20) | |
echo minTime.inMicroseconds, " micro second" | |
sleep(2000) | |
when EnableCount: | |
var gcdCount = 0 | |
proc gcd*[T](x, y: T): T = | |
# gcd from math module in Nim's stdlib. | |
var (x, y) = (x, y) | |
while y != 0: | |
x = x mod y | |
swap x, y | |
when EnableCount: | |
inc gcdCount | |
abs x | |
when EnableCount: | |
var gcdLARCount = 0 | |
proc gcdLAR*[T: SomeSignedInt](x, y: T): T = | |
# https://en.wikipedia.org/wiki/Euclidean_algorithm#Method_of_least_absolute_remainders | |
# Reduce the number of iterations by using smaller absolute remainders | |
# x and y are always positive | |
var (x, y) = (x.abs, y.abs) | |
while y != 0: | |
#debugEcho x, " mod ", y | |
x = x mod y | |
if x > (y shr 1): | |
# As y is a multiple of the gcd, you can add or subtract y to x | |
# You can multiply -1 to x as -x is still multiple of the gcd. | |
x = y - x | |
swap x, y | |
when EnableCount: | |
inc gcdLARCount | |
abs x | |
when EnableCount: | |
var gcdLAR2Count = 0 | |
proc gcdLAR2*[T: SomeSignedInt](x, y: T): T = | |
# gcdLAR with many branches. | |
var (x, y) = (x, y) | |
# x and y can become negative | |
while y != 0: | |
#debugEcho x, " mod ", y | |
if x > 0: | |
x = x mod y | |
if y > 0: | |
if x > y div 2: | |
dec x, y | |
else: | |
if x > -y div 2: | |
inc x, y | |
else: | |
x = x mod y | |
if y > 0: | |
if x < -y div 2: | |
inc x, y | |
else: | |
if x < y div 2: | |
dec x, y | |
swap x, y | |
when EnableCount: | |
inc gcdLAR2Count | |
abs x | |
when EnableCount: | |
var gcdLAR3Count = 0 | |
proc gcdLAR3*[T: SomeInteger](x, y: T): T = | |
type UT = T.toUnsigned | |
# x and y are always positive | |
var (x, y) = (x.abs, y.abs) | |
while y != 0: | |
# Make x mod y returns -y/2 < r <= y/2 | |
let z = (y - 1) div 2 | |
# x + z can overflow. So convert them to unsigned. | |
x = abs(((x.UT + z.UT) mod y.UT).int - z) | |
swap x, y | |
when EnableCount: | |
inc gcdLAR3Count | |
abs x | |
when EnableCount: | |
var gcdLAR4Count = 0 | |
proc gcdLAR4*[T](x, y: T): T = | |
# x and y are always positive | |
var (x, y) = (x.abs, y.abs) | |
while y != 0: | |
#debugEcho x, " mod ", y | |
x = x mod y | |
# As y is a multiple of the gcd, you can add or subtract y to x | |
# You can multiply -1 to x as -x is still multiple of the gcd. | |
x = min(x, y - x) | |
swap x, y | |
when EnableCount: | |
inc gcdLAR4Count | |
abs x | |
import std/bitops | |
when EnableCount: | |
var gcdSubCount = 0 | |
proc gcdSub*[T: SomeInteger](x, y: T): T = | |
# Same to gcd in https://github.com/nim-lang/bigints | |
var | |
(x, y) = (x.abs, y.abs) | |
let | |
i = countTrailingZeroBits(x) | |
j = countTrailingZeroBits(y) | |
let lg2 = min(i, j) | |
x = x shr i | |
y = y shr j | |
# Both x and y are odd now | |
while true: | |
if y > x: | |
swap x, y | |
x -= y | |
if x == 0: | |
return y shl lg2 | |
# odd - odd = even | |
x = x shr countTrailingZeroBits(x) | |
when EnableCount: | |
inc gcdSubCount | |
when EnableCount: | |
var gcdSub2Count = 0 | |
proc gcdSub2*[T: SomeInteger](x, y: T): T = | |
# Same to gcdSub excepts using `mod` instead of subtraction | |
# to reduce the number of iterations. | |
var | |
(x, y) = (x.abs, y.abs) | |
let | |
i = countTrailingZeroBits(x) | |
j = countTrailingZeroBits(y) | |
let lg2 = min(i, j) | |
x = x shr i | |
y = y shr j | |
if y > x: | |
swap x, y | |
while true: | |
x = x mod y | |
if x == 0: | |
return y shl lg2 | |
x = x shr countTrailingZeroBits(x) | |
swap x, y | |
when EnableCount: | |
inc gcdSub2Count | |
func gcd_impulse(u, v: int): int = | |
# From https://github.com/SciNim/impulse/blob/49b813232507470a047727712acda105b84c7815/impulse/fft/factorization.nim#L23-L39 | |
if u == 0: return v | |
if v == 0: return u | |
let shift = countTrailingZeroBits(u or v) | |
var u = u shr u.countTrailingZeroBits() | |
var v = v | |
while true: | |
v = v shr v.countTrailingZeroBits() | |
if u > v: | |
swap(u, v) | |
v = v - u | |
if v == 0: | |
return u shl shift | |
when EnableTests: | |
template test(gcdProc: typed) = | |
doAssert gcdProc(1, 1) == 1 | |
doAssert gcdProc(2, 1) == 1 | |
doAssert gcdProc(3, 1) == 1 | |
doAssert gcdProc(2, 2) == 2 | |
doAssert gcdProc(3, 2) == 1 | |
doAssert gcdProc(4, 2) == 2 | |
doAssert gcdProc(5, 2) == 1 | |
doAssert gcdProc(3, 3) == 3 | |
doAssert gcdProc(4, 3) == 1 | |
doAssert gcdProc(5, 3) == 1 | |
doAssert gcdProc(6, 3) == 3 | |
doAssert gcdProc(10, 6) == 2 | |
doAssert gcdProc(60, 33) == 3 | |
doAssert gcdProc(121, 11) == 11 | |
doAssert gcdProc(1024 * 13, 1024 * 17) == 1024 | |
doAssert gcdProc(122, 11) == 1 | |
doAssert gcdProc(104, 66) == 2 | |
doAssert gcdProc(8191 * 2, 3 * 2) == 2 | |
doAssert gcdProc(8191 * 7, 15 * 7) == 7 | |
doAssert gcdProc(131071, 8191) == 1 | |
doAssert gcdProc(131071 * 5 * 3 * 3, 127 * 11 * 7 * 3) == 3 | |
doAssert gcdProc(131071 * 11 * 7 * 5 * 2, 127 * 19 * 17 * 13 * 2) == 2 | |
doAssert gcdProc(131071 * 11 * 7 * 5 * 7, 127 * 19 * 17 * 13 * 7) == 7 | |
doAssert gcdProc(131071 * 11 * 7 * 5 * 8191, 127 * 19 * 17 * 13 * 8191) == 8191 | |
doAssert gcdProc(int.high, int.high) == int.high | |
doAssert gcdProc(int.high, int.high - 1) == 1 | |
doAssert gcdProc(int.high - 1, int.high - 3) == 2 | |
doAssert gcdProc(int.high, int.high div 2) == 1 | |
test(gcdLAR) | |
test(gcdLAR2) | |
test(gcdLAR3) | |
test(gcdLAR4) | |
test(gcdSub) | |
test(gcdSub2) | |
test(gcd_impulse) | |
when EnableBench: | |
proc runBench = | |
let seed = paramCount() or 123 | |
var | |
rand: Rand | |
res0 = 0 | |
stdout.write "gcd in stdlib: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res0 = res0 xor gcd(x, y) | |
var res1 = 0 | |
stdout.write "gcdLAR: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res1 = res1 xor gcdLAR(x, y) | |
var res2 = 0 | |
stdout.write "gcdLAR2: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res2 = res2 xor gcdLAR2(x, y) | |
var res3 = 0 | |
stdout.write "gcdLAR3: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res3 = res3 xor gcdLAR3(x, y) | |
var res4 = 0 | |
stdout.write "gcdLAR4: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res4 = res4 xor gcdLAR4(x, y) | |
var res5 = 0 | |
stdout.write "gcdSub: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res5 = res5 xor gcdSub(x, y) | |
var res6 = 0 | |
stdout.write "gcdSub2: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res6 = res6 xor gcdSub2(x, y) | |
var res7 = 0 | |
stdout.write "gcd_impulse: " | |
bench(101): | |
rand = seed.initRand() | |
do: | |
for i in 0 .. 10000: | |
var (x, y) = (rand.rand(1 .. int.high), rand.rand(1 .. int.high)) | |
if x < y: | |
swap x, y | |
res7 = res7 xor gcd_impulse(x, y) | |
when EnableCount: | |
dump gcdCount | |
dump gcdLARCount | |
dump gcdLAR2Count | |
dump gcdLAR3Count | |
dump gcdLAR4Count | |
dump gcdSubCount | |
dump gcdSub2Count | |
dump gcd_impulse | |
echo res0, res1, res2, res3, res4, res5, res6, res7 | |
doAssert res0 == res1 and res0 == res2 and res0 == res3 and res0 == res4 and res0 == res5 and res0 == res6 and res0 == res7 | |
runBench() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment