Last active
January 13, 2017 08:09
-
-
Save wilzbach/74ffebc236d3a9c10bcae730fc1571fa to your computer and use it in GitHub Desktop.
std.random.uniform vs. bitwise
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> ldc -O5 -release -boundscheck=off test.d && ./test 1 2 8 256 1024 65536 | |
benchmark: 1 (bits: 1) | |
random.uniform = 742 ms, 824 μs, and 5 hnsecs | |
random.bitwise = 218 ms, 972 μs, and 9 hnsecs | |
random.uniform.fixed = 592 ms, 647 μs, and 2 hnsecs | |
benchmark: 2 (bits: 2) | |
random.uniform = 746 ms, 839 μs, and 5 hnsecs | |
random.bitwise = 497 ms and 189 μs | |
random.uniform.fixed = 594 ms, 196 μs, and 1 hnsec | |
benchmark: 7 (bits: 3) | |
random.uniform = 747 ms, 499 μs, and 6 hnsecs | |
random.bitwise = 712 ms, 791 μs, and 7 hnsecs | |
random.uniform.fixed = 595 ms, 432 μs, and 4 hnsecs | |
benchmark: 255 (bits: 8) | |
random.uniform = 746 ms, 915 μs, and 1 hnsec | |
random.bitwise = 1 sec, 787 ms, 508 μs, and 2 hnsecs | |
random.uniform.fixed = 592 ms, 198 μs, and 7 hnsecs | |
benchmark: 1023 (bits: 10) | |
random.uniform = 743 ms and 597 μs | |
random.bitwise = 2 secs, 208 ms, 421 μs, and 5 hnsecs | |
random.uniform.fixed = 593 ms, 368 μs, and 6 hnsecs | |
benchmark: 65535 (bits: 16) | |
random.uniform = 743 ms, 132 μs, and 7 hnsecs | |
random.bitwise = 3 secs, 483 ms, 936 μs, and 3 hnsecs | |
random.uniform.fixed = 592 ms, 316 μs, and 4 hnsecs | |
> dmd -O -release -boundscheck=off test.d && ./test 1 2 8 256 1024 65536 | |
benchmark: 1 (bits: 1) | |
random.uniform = 1 sec, 246 ms, 722 μs, and 9 hnsecs | |
random.bitwise = 630 ms, 657 μs, and 7 hnsecs | |
random.uniform.fixed = 1 sec, 236 ms, 366 μs, and 6 hnsecs | |
benchmark: 2 (bits: 2) | |
random.uniform = 1 sec, 239 ms, 266 μs, and 8 hnsecs | |
random.bitwise = 1 sec, 241 ms, 494 μs, and 5 hnsecs | |
random.uniform.fixed = 1 sec, 238 ms, 637 μs, and 3 hnsecs | |
benchmark: 7 (bits: 3) | |
random.uniform = 1 sec, 281 ms, 351 μs, and 6 hnsecs | |
random.bitwise = 1 sec, 808 ms, 20 μs, and 9 hnsecs | |
random.uniform.fixed = 1 sec, 236 ms, 833 μs, and 2 hnsecs | |
benchmark: 254 (bits: 8) | |
random.uniform = 1 sec, 242 ms, 289 μs, and 6 hnsecs | |
random.bitwise = 4 secs, 745 ms, 717 μs, and 2 hnsecs | |
random.uniform.fixed = 1 sec, 247 ms, 597 μs, and 6 hnsecs | |
benchmark: 1023 (bits: 10) | |
random.uniform = 1 sec, 245 ms, 6 μs, and 6 hnsecs | |
random.bitwise = 6 secs, 296 ms, 748 μs, and 9 hnsecs | |
random.uniform.fixed = 1 sec, 237 ms, 685 μs, and 5 hnsecs | |
benchmark: 65535 (bits: 16) | |
random.uniform = 1 sec, 245 ms, 286 μs, and 7 hnsecs | |
random.bitwise = 9 secs, 638 ms, 437 μs, and 1 hnsec | |
random.uniform.fixed = 1 sec, 239 ms, 99 μs, and 5 hnsecs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.algorithm, std.conv, std.datetime, std.functional, std.math, std.range, std.random, std.stdio, std.traits, std.typecons; | |
private void doNotOptimizeAway(T)(auto ref T t) | |
{ | |
import core.thread : getpid; | |
import std.stdio : writeln; | |
if(getpid() == 1) { | |
writeln(*cast(char*)&t); | |
} | |
} | |
void main(string[] args) | |
{ | |
assert(args.length > 1); | |
args[1..$].each!(x => x.to!int.test); | |
} | |
void test(int k) | |
{ | |
auto n = 50_000; | |
k = k > 2 ? k - 1 : k; | |
int kbits = cast(int) log2(k) + 1; | |
writefln("benchmark: %d (bits: %d)", k, kbits); | |
auto bench = benchmark!( | |
{ doNotOptimizeAway({ | |
long bs; | |
auto rng = Mt19937(42); | |
foreach (_; 0..n) | |
bs += uniform(0, k, rng); | |
return bs; | |
}());}, | |
{ doNotOptimizeAway({ | |
import core.bitop; | |
long bs; | |
auto rng = Mt19937(42); | |
auto r = rng.bitwise; | |
foreach (_; 0..n) | |
{ | |
long t; | |
// opSlice doesn't work with take | |
foreach (i; 0..kbits) | |
{ | |
t &= r.front << i; | |
r.popFront; | |
} | |
bs += t; | |
} | |
return bs; | |
}());}, | |
{ doNotOptimizeAway({ | |
long bs; | |
auto rng = Mt19937(42); | |
foreach (_; 0..n) | |
bs += uniform(0, 255, rng); | |
return bs; | |
}());}, | |
)(1000); | |
string[] names = ["random.uniform", "random.bitwise", "random.uniform.fixed"]; | |
foreach(j,r;bench) | |
writefln("%-15s = %s", names[j], r.to!Duration); | |
} | |
// from PR 5002 | |
private struct Bitwise(R) | |
if (isInputRange!R && isIntegral!(ElementType!R)) | |
{ | |
private: | |
alias ElemType = ElementType!R; | |
alias UnsignedElemType = Unsigned!ElemType; | |
R parent; | |
enum bitsNum = ElemType.sizeof * 8; | |
size_t maskPos = 1; | |
static if (isBidirectionalRange!R) | |
{ | |
size_t backMaskPos = bitsNum; | |
} | |
public: | |
this()(auto ref R range) | |
{ | |
parent = range; | |
} | |
static if (isInfinite!R) | |
{ | |
enum empty = false; | |
} | |
else | |
{ | |
/** | |
* Check if the range is empty | |
* | |
* Returns: a boolean true or false | |
*/ | |
bool empty() | |
{ | |
static if (hasLength!R) | |
{ | |
return length == 0; | |
} | |
else static if (isBidirectionalRange!R) | |
{ | |
if (parent.empty) | |
{ | |
return true; | |
} | |
else | |
{ | |
/* | |
If we have consumed the last element of the range both from | |
the front and the back, then the masks positions will overlap | |
*/ | |
return parent.save.dropOne.empty && (maskPos > backMaskPos); | |
} | |
} | |
else | |
{ | |
/* | |
If we consumed the last element of the range, but not all the | |
bits in the last element | |
*/ | |
return parent.empty; | |
} | |
} | |
} | |
bool front() | |
{ | |
assert(!empty); | |
return (parent.front & mask(maskPos)) != 0; | |
} | |
void popFront() | |
{ | |
assert(!empty); | |
++maskPos; | |
if (maskPos > bitsNum) | |
{ | |
parent.popFront; | |
maskPos = 1; | |
} | |
} | |
static if (hasLength!R) | |
{ | |
size_t length() | |
{ | |
auto len = parent.length * bitsNum - (maskPos - 1); | |
static if (isBidirectionalRange!R) | |
{ | |
len -= bitsNum - backMaskPos; | |
} | |
return len; | |
} | |
alias opDollar = length; | |
} | |
static if (isForwardRange!R) | |
{ | |
typeof(this) save() | |
{ | |
auto result = this; | |
result.parent = parent.save; | |
return result; | |
} | |
} | |
static if (isBidirectionalRange!R) | |
{ | |
bool back() | |
{ | |
assert(!empty); | |
return (parent.back & mask(backMaskPos)) != 0; | |
} | |
void popBack() | |
{ | |
assert(!empty); | |
--backMaskPos; | |
if (backMaskPos == 0) | |
{ | |
parent.popBack; | |
backMaskPos = bitsNum; | |
} | |
} | |
} | |
static if (isRandomAccessRange!R) | |
{ | |
/** | |
Return the `n`th bit within the range | |
*/ | |
bool opIndex(size_t n) | |
in | |
{ | |
/* | |
If it does not have the length property, it means that R is | |
an infinite range | |
*/ | |
static if (hasLength!R) | |
{ | |
assert(n < length, "Index out of bounds"); | |
} | |
} | |
body | |
{ | |
immutable size_t remainingBits = bitsNum - maskPos + 1; | |
// If n >= maskPos, then the bit sign will be 1, otherwise 0 | |
immutable sizediff_t sign = (remainingBits - n - 1) >> (sizediff_t.sizeof * 8 - 1); | |
/* | |
By truncating n with remainingBits bits we have skipped the | |
remaining bits in parent[0], so we need to add 1 to elemIndex. | |
Because bitsNum is a power of 2, n / bitsNum == n >> bitsNum.bsf | |
*/ | |
import core.bitop : bsf; | |
immutable size_t elemIndex = sign * (((n - remainingBits) >> bitsNum.bsf) + 1); | |
/* | |
Since the indexing is from LSB to MSB, we need to index at the | |
remainder of (n - remainingBits). | |
Because bitsNum is a power of 2, n % bitsNum == n & (bitsNum - 1) | |
*/ | |
immutable size_t elemMaskPos = (sign ^ 1) * (maskPos + n) | |
+ sign * (1 + ((n - remainingBits) & (bitsNum - 1))); | |
return (parent[elemIndex] & mask(elemMaskPos)) != 0; | |
} | |
/** | |
Assigns `flag` to the `n`th bit within the range | |
*/ | |
void opIndexAssign(bool flag, size_t n) | |
in | |
{ | |
static if (hasLength!R) | |
{ | |
assert(n < length, "Index out of bounds"); | |
} | |
} | |
body | |
{ | |
import core.bitop : bsf; | |
immutable size_t remainingBits = bitsNum - maskPos + 1; | |
immutable sizediff_t sign = (remainingBits - n - 1) >> (sizediff_t.sizeof * 8 - 1); | |
immutable size_t elemIndex = sign * (((n - remainingBits) >> bitsNum.bsf) + 1); | |
immutable size_t elemMaskPos = (sign ^ 1) * (maskPos + n) | |
+ sign * (1 + ((n - remainingBits) & (bitsNum - 1))); | |
auto elem = parent[elemIndex]; | |
auto elemMask = mask(elemMaskPos); | |
parent[elemIndex] = cast(UnsignedElemType)(flag * (elem | elemMask) | |
+ (flag ^ 1) * (elem & ~elemMask)); | |
} | |
Bitwise!R opSlice() | |
{ | |
return this.save; | |
} | |
Bitwise!R opSlice(size_t start, size_t end) | |
in | |
{ | |
assert(start < end, "Invalid bounds: end <= start"); | |
} | |
body | |
{ | |
import core.bitop : bsf; | |
size_t remainingBits = bitsNum - maskPos + 1; | |
sizediff_t sign = (remainingBits - start - 1) >> (sizediff_t.sizeof * 8 - 1); | |
immutable size_t startElemIndex = sign * (((start - remainingBits) >> bitsNum.bsf) + 1); | |
immutable size_t startElemMaskPos = (sign ^ 1) * (maskPos + start) | |
+ sign * (1 + ((start - remainingBits) & (bitsNum - 1))); | |
immutable size_t sliceLen = end - start - 1; | |
remainingBits = bitsNum - startElemMaskPos + 1; | |
sign = (remainingBits - sliceLen - 1) >> (sizediff_t.sizeof * 8 - 1); | |
immutable size_t endElemIndex = startElemIndex | |
+ sign * (((sliceLen - remainingBits) >> bitsNum.bsf) + 1); | |
immutable size_t endElemMaskPos = (sign ^ 1) * (startElemMaskPos + sliceLen) | |
+ sign * (1 + ((sliceLen - remainingBits) & (bitsNum - 1))); | |
typeof(return) result; | |
// Get the slice to be returned from the parent | |
result.parent = (parent[startElemIndex .. endElemIndex + 1]).save; | |
result.maskPos = startElemMaskPos; | |
static if (isBidirectionalRange!R) | |
{ | |
result.backMaskPos = endElemMaskPos; | |
} | |
return result; | |
} | |
} | |
private: | |
auto mask(size_t maskPos) | |
{ | |
return (1UL << (maskPos - 1UL)); | |
} | |
} | |
/** | |
Bitwise adapter over an integral type range. Consumes the range elements bit by | |
bit, from the least significant bit to the most significant bit. | |
Params: | |
R = an integral input range to iterate over | |
Returns: | |
A `Bitwise` input range with propagated forward, bidirectional | |
and random access capabilities | |
*/ | |
auto bitwise(R)(auto ref R range) | |
if (isInputRange!R && isIntegral!(ElementType!R)) | |
{ | |
return Bitwise!R(range); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment