Last active
June 22, 2023 17:25
-
-
Save flyingmutant/2ecab98589800e2d682728d1ec4cd585 to your computer and use it in GitHub Desktop.
Lemire's algorithm vs fixed-point multiplication
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
goos: linux | |
goarch: amd64 | |
pkg: rand | |
cpu: Intel(R) Core(TM) i7-10510U CPU @ 1.80GHz | |
BenchmarkUint32n/fp/small-8 721959876 1.586 ns/op | |
BenchmarkUint32n/fp/midrange-8 886869387 1.376 ns/op | |
BenchmarkUint32n/fp/big-8 843097094 1.520 ns/op | |
BenchmarkUint32n/fp/rand-8 565216611 2.137 ns/op | |
BenchmarkUint32n/lemire/small-8 474396974 3.189 ns/op | |
BenchmarkUint32n/lemire/midrange-8 472477327 3.216 ns/op | |
BenchmarkUint32n/lemire/big-8 153740991 8.273 ns/op | |
BenchmarkUint32n/lemire/rand-8 100000000 12.32 ns/op | |
BenchmarkUint64n/fp/small-8 630359661 2.166 ns/op | |
BenchmarkUint64n/fp/midrange-8 311503485 3.272 ns/op | |
BenchmarkUint64n/fp/big-8 377017726 3.210 ns/op | |
BenchmarkUint64n/fp/rand-8 259585053 4.295 ns/op | |
BenchmarkUint64n/lemire/small-8 483717476 2.316 ns/op | |
BenchmarkUint64n/lemire/midrange-8 491123155 2.536 ns/op | |
BenchmarkUint64n/lemire/big-8 100000000 11.00 ns/op | |
BenchmarkUint64n/lemire/rand-8 73536332 15.92 ns/op |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package rand | |
import ( | |
"math" | |
"math/bits" | |
"testing" | |
) | |
var ( | |
wyrandState uint64 | |
uint32Sink uint32 | |
uint64Sink uint64 | |
) | |
func wyrand64(state *uint64) uint64 { | |
s := *state + 0xa0761d6478bd642f | |
*state = s | |
hi, lo := bits.Mul64(s, s^0xe7037ed1a0b428db) | |
return hi ^ lo | |
} | |
func Uint64() uint64 { | |
return wyrand64(&wyrandState) | |
} | |
func Uint32() uint32 { | |
return uint32(Uint64() >> 32) | |
} | |
func Uint32nFixedPoint(n uint32) uint32 { | |
res, _ := bits.Mul64(Uint64(), uint64(n)) | |
return uint32(res) | |
} | |
func Uint32nLemire(n uint32) uint32 { | |
hi, lo := bits.Mul32(Uint32(), n) | |
if lo < n { | |
thresh := (-n) % n | |
for lo < thresh { | |
hi, lo = bits.Mul32(Uint32(), n) | |
} | |
} | |
return hi | |
} | |
func Uint64nFixedPoint(n uint64) uint64 { | |
res, frac := bits.Mul64(Uint64(), n) | |
if n <= math.MaxUint32 { | |
return res | |
} | |
hi, _ := bits.Mul64(Uint64(), n) | |
_, carry := bits.Add64(frac, hi, 0) | |
return res + carry | |
} | |
func Uint64nLemire(n uint64) uint64 { | |
hi, lo := bits.Mul64(Uint64(), n) | |
if lo < n { | |
thresh := (-n) % n | |
for lo < thresh { | |
hi, lo = bits.Mul64(Uint64(), n) | |
} | |
} | |
return hi | |
} | |
func BenchmarkUint32n(b *testing.B) { | |
b.Run("fp/small", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nFixedPoint(uint32(i) + 1) | |
} | |
uint32Sink = s | |
}) | |
b.Run("fp/midrange", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nFixedPoint(math.MaxUint16 + uint32(i)) | |
} | |
uint32Sink = s | |
}) | |
b.Run("fp/big", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nFixedPoint(math.MaxInt32 - uint32(i)) | |
} | |
uint32Sink = s | |
}) | |
b.Run("fp/rand", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nFixedPoint(Uint32() | 1) | |
} | |
uint32Sink = s | |
}) | |
b.Run("lemire/small", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nLemire(uint32(i) + 1) | |
} | |
uint32Sink = s | |
}) | |
b.Run("lemire/midrange", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nLemire(math.MaxUint16 + uint32(i)) | |
} | |
uint32Sink = s | |
}) | |
b.Run("lemire/big", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nLemire(math.MaxInt32 - uint32(i)) | |
} | |
uint32Sink = s | |
}) | |
b.Run("lemire/rand", func(b *testing.B) { | |
var s uint32 | |
for i := 0; i < b.N; i++ { | |
s += Uint32nLemire(Uint32() | 1) | |
} | |
uint32Sink = s | |
}) | |
} | |
func BenchmarkUint64n(b *testing.B) { | |
b.Run("fp/small", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nFixedPoint(uint64(i) + 1) | |
} | |
uint64Sink = s | |
}) | |
b.Run("fp/midrange", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nFixedPoint(math.MaxUint32 + uint64(i)) | |
} | |
uint64Sink = s | |
}) | |
b.Run("fp/big", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nFixedPoint(math.MaxInt64 - uint64(i)) | |
} | |
uint64Sink = s | |
}) | |
b.Run("fp/rand", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nFixedPoint(Uint64() | 1) | |
} | |
uint64Sink = s | |
}) | |
b.Run("lemire/small", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nLemire(uint64(i) + 1) | |
} | |
uint64Sink = s | |
}) | |
b.Run("lemire/midrange", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nLemire(math.MaxUint32 + uint64(i)) | |
} | |
uint64Sink = s | |
}) | |
b.Run("lemire/big", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nLemire(math.MaxInt64 - uint64(i)) | |
} | |
uint64Sink = s | |
}) | |
b.Run("lemire/rand", func(b *testing.B) { | |
var s uint64 | |
for i := 0; i < b.N; i++ { | |
s += Uint64nLemire(Uint64() | 1) | |
} | |
uint64Sink = s | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment