wilzbach · December 29, 2016 14:08
diff --git a/bitarray.d b/bitarray.d
 import std.algorithm, std.conv, std.datetime, std.functional, std.range, std.stdio, std.traits, std.typecons;

 private void doNotOptimizeAway(T)(auto ref T t)
 {
    import core.thread : getpid;
    import std.stdio : writeln;
    if(getpid() == 1) {
        writeln(*cast(char*)&t);
    }
 }

 import bitwise : bitwise, bitwise2BigEndian, bitwise2LittleEndian, Bitwise3, BitRange;

 void main()
 {
    auto n = 50_000;
    auto arr = n.iota!size_t.array;

    writeln("starting benchmark");
    auto bench = benchmark!(
        { doNotOptimizeAway(arr.bitwise.filter!(b => b > 0).sum);},
        { doNotOptimizeAway({
            long bs;
            foreach (b; arr.bitwise)
                bs += b;
            return bs;
        }());},
        { doNotOptimizeAway({
            long bs;
            foreach (b; arr.bitwise2BigEndian)
                bs += b;
            return bs;
        }());},
        { doNotOptimizeAway({
            long bs;
            foreach (b; arr.bitwise2LittleEndian)
                bs += b;
            return bs;
        }());},
        { doNotOptimizeAway({
            long bs;
            foreach(b; Bitwise3(&arr[0], size_t.sizeof * size_t.sizeof * arr.length))
                bs += b;
            return bs;
        }());},
        { doNotOptimizeAway({
            long bs;
            foreach(b; BitRange(&arr[0], size_t.sizeof * size_t.sizeof * arr.length))
                bs++;
            return bs;
        }());},
    )(1000);

    string[] names = ["bitwise.filter", "bitwise.foreach", "bitwise2Big.foreach", "bitwise2Little.foreach", "bitwise3.foreach","bitrange.foreach"];
    foreach(j,r;bench)
        writefln("%-15s = %s", names[j], r.to!Duration);
 }
diff --git a/bitwise.d b/bitwise.d
 import std.range, std.traits;
 import core.bitop;

 private struct Bitwise(R)
    if (isInputRange!R && isIntegral!(ElementType!R))
 {
 private:
    alias ElemType = ElementType!R;
    alias UnsignedElemType = Unsigned!ElemType;

    R parent;
    enum bitsNum = ElemType.sizeof * 8;
    size_t maskPos = bitsNum;

    static if (isBidirectionalRange!R)
    {
        size_t backMaskPos = 1;
    }

 public:
    this()(auto ref R range)
    {
        parent = range;
    }

    static if (isInfinite!R)
    {
        enum empty = false;
    }
    else
    {
        /**
         * Check if the range is empty
         *
         * Returns: a boolean true or false
         */
        bool empty()
        {
            static if (hasLength!R)
            {
                return length == 0;
            }
            else static if (isBidirectionalRange!R)
            {
                bool isOverlapping = parent.empty;
                if (!parent.empty)
                {
                    /*
                       If we have consumed the last element of the range both from
                       the front and the back, then the masks positions will overlap
                     */
                    R parentCopy = parent.save;
                    parentCopy.popFront;
                    isOverlapping = parentCopy.empty && (maskPos < backMaskPos);
                }
                return parent.empty || isOverlapping;
            }
            else
            {
                /*
                   If we consumed the last element of the range, but not all the
                   bits in the last element
                 */
                return parent.empty;
            }
        }
    }

    bool front()
    {
        assert(!empty);
        return (parent.front & mask(maskPos)) != 0;
    }

    void popFront()
    {
        --maskPos;
        if (maskPos == 0)
        {
            parent.popFront;
            maskPos = bitsNum;
        }
    }

    static if (hasLength!R)
    {
        size_t length()
        {
            auto len = parent.length * bitsNum - (bitsNum - maskPos);
            static if (isBidirectionalRange!R)
            {
                len -= (backMaskPos - 1);
            }
            return len;
        }

        alias opDollar = length;
    }

    static if (isForwardRange!R)
    {
        typeof(this) save()
        {
            auto result = this;
            result.parent = parent.save;
            return result;
        }
    }

    static if (isBidirectionalRange!R)
    {
        bool back()
        {
            assert(!empty);
            return (parent.back & mask(backMaskPos)) != 0;
        }

        void popBack()
        {
            assert(!empty);
            ++backMaskPos;
            if (backMaskPos > bitsNum)
            {
                parent.popBack;
                backMaskPos = 1;
            }
        }
    }

    static if (isRandomAccessRange!R)
    {
        /**
          Return the `n`th bit within the range
         */
        bool opIndex(size_t n)
        in
        {
            /*
               If it does not have the length property, it means that R is
               an infinite range
             */
            static if (hasLength!R)
            {
                assert(n < length, "Index out of bounds");
            }
        }
        body
        {
            // If n >= maskPos, then the bit sign will be 1, otherwise 0
            immutable sizediff_t sign = (maskPos - n - 1) >> (sizediff_t.sizeof * 8 - 1);
            /*
               By truncating n with maskPos bits we have skipped the remaining
               maskPos bits in parent[0], so we need to add 1 to elemIndex.

               Because bitsNum is a power of 2, n / bitsNum == n >> bitsNum.bsf
             */
            import core.bitop : bsf;
            immutable size_t elemIndex = sign * (((n - maskPos) >> bitsNum.bsf) + 1);

            /*
               Since the indexing is from MSB to LSB, we need to subtract the
               remainder from the number of bits that the element type has.

               Because bitsNum is a power of 2, n % bitsNum == n & (bitsNum - 1)
             */
            immutable size_t elemMaskPos = (sign ^ 1) * (maskPos - n)
                             + sign * (bitsNum - ((n - maskPos) & (bitsNum - 1)));

            return (parent[elemIndex] & mask(elemMaskPos)) != 0;
        }

        /**
          Assigns `flag` to the `n`th bit within the range
         */
        void opIndexAssign(bool flag, size_t n)
        in
        {
            static if (hasLength!R)
            {
                assert(n < length, "Index out of bounds");
            }
        }
        body
        {
            import core.bitop : bsf;

            immutable sizediff_t sign = (maskPos - n - 1) >> (sizediff_t.sizeof * 8 - 1);
            immutable size_t elemIndex = sign * (((n - maskPos) >> bitsNum.bsf) + 1);
            immutable size_t elemMaskPos = (sign ^ 1) * (maskPos - n)
                               + sign * (bitsNum - ((n - maskPos) & (bitsNum - 1)));

            auto elem = parent[elemIndex];
            auto elemMask = mask(elemMaskPos);
            parent[elemIndex] = cast(UnsignedElemType)(flag * (elem | elemMask)
                                                       + (flag ^ 1) * (elem & ~elemMask));
        }

        Bitwise!R opSlice()
        {
            return this.save;
        }

        Bitwise!R opSlice(size_t start, size_t end)
        in
        {
            assert(start < end, "Invalid bounds: end <= start");
        }
        body
        {
            import core.bitop : bsf;

            sizediff_t sign = (maskPos - start - 1) >> (sizediff_t.sizeof * 8 - 1);
            immutable size_t startElemIndex = sign * (((start - maskPos) >> bitsNum.bsf) + 1);
            immutable size_t startElemMaskPos = (sign ^ 1) * (maskPos - start)
                                              + sign * (bitsNum - ((start - maskPos) & (bitsNum - 1)));

            immutable size_t sliceLen = end - start - 1;
            sign = (startElemMaskPos - sliceLen - 1) >> (sizediff_t.sizeof * 8 - 1);
            immutable size_t endElemIndex = startElemIndex
                                          + sign * (((sliceLen - startElemMaskPos) >> bitsNum.bsf) + 1);
            immutable size_t endElemMaskPos = (sign ^ 1) * (startElemMaskPos - sliceLen)
                                            + sign * (bitsNum - ((sliceLen - startElemMaskPos) & (bitsNum - 1)));

            typeof(return) result;
            // Get the slice to be returned from the parent
            result.parent = (parent[startElemIndex .. endElemIndex + 1]).save;
            result.maskPos = startElemMaskPos;
            static if (isBidirectionalRange!R)
            {
                result.backMaskPos = endElemMaskPos;
            }
            return result;
        }
    }

 private:
    auto mask(size_t maskPos)
    {
        return (1UL << (maskPos - 1UL));
    }
 }

 /**
 Bitwise adapter over an integral type range. Consumes the range elements bit by bit.

 Params:
    R = an integral input range to iterate over

 Returns:
    A `Bitwise` input range with propagated forward, bidirectional
    and random access capabilities
 */
 auto bitwise(R)(auto ref R range)
    if (isInputRange!R && isIntegral!(ElementType!R))
 {
    return Bitwise!R(range);
 }

 private struct Bitwise2BigEndian(R)
    if (isInputRange!R && isIntegral!(ElementType!R))
 {
 private:
    alias ElemType = ElementType!R;
    alias UnsignedElemType = Unsigned!ElemType;

    R parent;
    enum bitsNum = ElemType.sizeof * 8;
    UnsignedElemType mask = 0;
    ElemType copy;

    import std.math : pow;
    enum startMask = UnsignedElemType.max / 2 + 1;

 public:
    this()(auto ref R range)
    {
        parent = range;
        // init
        copy = parent.front;
        parent.popFront;
        mask = startMask;
    }
    bool empty()
    {
        return mask == 0;
    }

    bool front()
    {
        assert(!empty);
        return (copy & mask) != 0;
    }

    void popFront()
    {
        if (mask == 1 && !parent.empty)
        {
            copy = parent.front;
            parent.popFront;
            mask = startMask;
        }
        else
        {
            mask >>= 1;
        }
    }
 }

 private struct Bitwise2LittleEndian(R)
    if (isInputRange!R && isIntegral!(ElementType!R))
 {
 private:
    alias ElemType = ElementType!R;
    alias UnsignedElemType = Unsigned!ElemType;

    R parent;
    enum bitsNum = ElemType.sizeof * 8;
    UnsignedElemType mask = 0;
    ElemType copy;

    enum endMask = UnsignedElemType(1) << (bitsNum - 1);

 public:
    this()(auto ref R range)
    {
        parent = range;
        // init
        copy = parent.front;
        parent.popFront;
        mask = 1;
    }
    bool empty()
    {
        return mask == 0;
    }

    bool front()
    {
        assert(!empty);
        return (copy & mask) != 0;
    }

    void popFront()
    {
        if (mask == endMask && !parent.empty)
        {
            copy = parent.front;
            parent.popFront;
            mask = 1;
        }
        else
        {
            mask <<= 1;
        }
    }
 }

 /**
 Bitwise adapter over an integral type range. Consumes the range elements bit by bit.

 Params:
    R = an integral input range to iterate over

 Returns:
    A `Bitwise` input range with propagated forward, bidirectional
    and random access capabilities
 */
 auto bitwise2BigEndian(R)(auto ref R range)
    if (isInputRange!R && isIntegral!(ElementType!R))
 {
    return Bitwise2BigEndian!R(range);
 }

 auto bitwise2LittleEndian(R)(auto ref R range)
    if (isInputRange!R && isIntegral!(ElementType!R))
 {
    return Bitwise2LittleEndian!R(range);
 }

 private struct Bitwise3
 {
 private:
    BitRange parent;
    size_t pos;

 public:
    this(const(size_t)* bitarr, size_t numbits) @system
    {
        parent = BitRange(bitarr, numbits);
    }

    bool empty()
    {
        return pos >= parent.len;
    }

    bool front()
    {
        if (pos == parent.idx)
            return true;
        else
            return false;
    }

    void popFront()
    {
        if (pos < parent.idx)
        {
            pos++;
            return;
        }

        if (!parent.empty)
        {
            parent.popFront;
        }
        pos++;
    }
 }

 unittest
 {
    import std.stdio;
    auto arr = [2UL, 3UL];
    //arr.bitwise2LittleEndian.writeln;
    //Bitwise3(&arr[0], 128).array.writeln;
 }

 unittest
 {
    import std.stdio;
    auto arr = [3UL];
    bt(arr.ptr, 1).writeln; // 1
    bts(arr.ptr, 2);
    arr[0].writeln; // 7

    arr = [3UL]; // reset
    auto r = arr.bitwise;
    r[1].writeln; // false
    r[2] = true;
    arr[0].writeln; // 2305843009213693955
 }

 /**
 * Range over bit set. Each element is the bit number that is set.
 *
 * This is more efficient than testing each bit in a sparsely populated bit
 * set. Note that the first bit in the bit set would be bit 0.
 */
 struct BitRange
 {
    /// Number of bits in each size_t
    enum bitsPerWord = size_t.sizeof * 8;

        const(size_t)*bits; // points at next word of bits to check
        size_t cur; // needed to allow searching bits using bsf
        size_t idx; // index of current set bit
        size_t len; // number of bits in the bit set.
    @nogc nothrow pure:

    /**
     * Construct a BitRange.
     *
     * Params:
     *   bitarr - The array of bits to iterate over
     *   numBits - The total number of valid bits in the given bit array
     */
    this(const(size_t)* bitarr, size_t numbits) @system
    {
        bits = bitarr;
        len = numbits;
        if (len)
        {
            // prime the first bit
            cur = *bits++ ^ 1;
            popFront();
        }
    }

    /// Range functions
    size_t front()
    {
        assert(!empty);
        return idx;
    }

    /// ditto
    bool empty() const
    {
        return idx >= len;
    }

    /// ditto
    void popFront() @system
    {
        // clear the current bit
        auto curbit = idx % bitsPerWord;
        cur ^= size_t(1) << curbit;
        if(!cur)
        {
            // find next size_t with set bit
            idx -= curbit;
            while (!cur)
            {
                if ((idx += bitsPerWord) >= len)
                    // now empty
                    return;
                cur = *bits++;
            }
            idx += bsf(cur);
        }
        else
        {
            idx += bsf(cur) - curbit;
        }
    }
 }
diff --git a/results b/results
 > dmd -O -release -boundscheck=off bitarray.d bitwise.d && ./bitarray

 bitwise.filter  = 1 minute, 2 secs, 925 ms, 145 μs, and 3 hnsecs
 bitwise.foreach = 38 secs, 534 ms, 801 μs, and 3 hnsecs
 bitwise2Big.foreach = 18 secs, 986 ms, 474 μs, and 6 hnsecs
 bitwise2Little.foreach = 20 secs, 882 ms, 840 μs, and 6 hnsecs
 bitwise3.foreach = 24 secs, 396 ms, 409 μs, and 7 hnsecs
 bitrange.foreach = 3 secs, 986 ms, 697 μs, and 9 hnsecs

 > ldc -O5 -release -boundscheck=off bitarray.d bitwise.d && ./bitarray
 bitwise.filter  = 29 secs, 259 ms, 840 μs, and 9 hnsecs
 bitwise.foreach = 24 secs, 668 ms, 420 μs, and 2 hnsecs
 bitwise2Big.foreach = 16 secs, 778 ms, 923 μs, and 5 hnsecs
 bitwise2Little.foreach = 16 secs, 849 ms, 760 μs, and 2 hnsecs
 bitwise3.foreach = 21 secs, 905 ms, 458 μs, and 8 hnsecs
 bitrange.foreach = 3 secs, 580 ms, and 864 μs
	import std.algorithm, std.conv, std.datetime, std.functional, std.range, std.stdio, std.traits, std.typecons;

	private void doNotOptimizeAway(T)(auto ref T t)
	{
	import core.thread : getpid;
	import std.stdio : writeln;
	if(getpid() == 1) {
	writeln(cast(char)&t);
	}
	}

	import bitwise : bitwise, bitwise2BigEndian, bitwise2LittleEndian, Bitwise3, BitRange;

	void main()
	{
	auto n = 50_000;
	auto arr = n.iota!size_t.array;

	writeln("starting benchmark");
	auto bench = benchmark!(
	{ doNotOptimizeAway(arr.bitwise.filter!(b => b > 0).sum);},
	{ doNotOptimizeAway({
	long bs;
	foreach (b; arr.bitwise)
	bs += b;
	return bs;
	}());},
	{ doNotOptimizeAway({
	long bs;
	foreach (b; arr.bitwise2BigEndian)
	bs += b;
	return bs;
	}());},
	{ doNotOptimizeAway({
	long bs;
	foreach (b; arr.bitwise2LittleEndian)
	bs += b;
	return bs;
	}());},
	{ doNotOptimizeAway({
	long bs;
	foreach(b; Bitwise3(&arr[0], size_t.sizeof * size_t.sizeof * arr.length))
	bs += b;
	return bs;
	}());},
	{ doNotOptimizeAway({
	long bs;
	foreach(b; BitRange(&arr[0], size_t.sizeof * size_t.sizeof * arr.length))
	bs++;
	return bs;
	}());},
	)(1000);

	string[] names = ["bitwise.filter", "bitwise.foreach", "bitwise2Big.foreach", "bitwise2Little.foreach", "bitwise3.foreach","bitrange.foreach"];
	foreach(j,r;bench)
	writefln("%-15s = %s", names[j], r.to!Duration);
	}
	import std.range, std.traits;
	import core.bitop;

	private struct Bitwise(R)
	if (isInputRange!R && isIntegral!(ElementType!R))
	{
	private:
	alias ElemType = ElementType!R;
	alias UnsignedElemType = Unsigned!ElemType;

	R parent;
	enum bitsNum = ElemType.sizeof * 8;
	size_t maskPos = bitsNum;

	static if (isBidirectionalRange!R)
	{
	size_t backMaskPos = 1;
	}

	public:
	this()(auto ref R range)
	{
	parent = range;
	}

	static if (isInfinite!R)
	{
	enum empty = false;
	}
	else
	{
	/**
	* Check if the range is empty
	*
	* Returns: a boolean true or false
	*/
	bool empty()
	{
	static if (hasLength!R)
	{
	return length == 0;
	}
	else static if (isBidirectionalRange!R)
	{
	bool isOverlapping = parent.empty;
	if (!parent.empty)
	{
	/*
	If we have consumed the last element of the range both from
	the front and the back, then the masks positions will overlap
	*/
	R parentCopy = parent.save;
	parentCopy.popFront;
	isOverlapping = parentCopy.empty && (maskPos < backMaskPos);
	}
	return parent.empty \|\| isOverlapping;
	}
	else
	{
	/*
	If we consumed the last element of the range, but not all the
	bits in the last element
	*/
	return parent.empty;
	}
	}
	}

	bool front()
	{
	assert(!empty);
	return (parent.front & mask(maskPos)) != 0;
	}

	void popFront()
	{
	--maskPos;
	if (maskPos == 0)
	{
	parent.popFront;
	maskPos = bitsNum;
	}
	}

	static if (hasLength!R)
	{
	size_t length()
	{
	auto len = parent.length * bitsNum - (bitsNum - maskPos);
	static if (isBidirectionalRange!R)
	{
	len -= (backMaskPos - 1);
	}
	return len;
	}

	alias opDollar = length;
	}

	static if (isForwardRange!R)
	{
	typeof(this) save()
	{
	auto result = this;
	result.parent = parent.save;
	return result;
	}
	}

	static if (isBidirectionalRange!R)
	{
	bool back()
	{
	assert(!empty);
	return (parent.back & mask(backMaskPos)) != 0;
	}

	void popBack()
	{
	assert(!empty);
	++backMaskPos;
	if (backMaskPos > bitsNum)
	{
	parent.popBack;
	backMaskPos = 1;
	}
	}
	}

	static if (isRandomAccessRange!R)
	{
	/**
	Return the `n`th bit within the range
	*/
	bool opIndex(size_t n)
	in
	{
	/*
	If it does not have the length property, it means that R is
	an infinite range
	*/
	static if (hasLength!R)
	{
	assert(n < length, "Index out of bounds");
	}
	}
	body
	{
	// If n >= maskPos, then the bit sign will be 1, otherwise 0
	immutable sizediff_t sign = (maskPos - n - 1) >> (sizediff_t.sizeof * 8 - 1);
	/*
	By truncating n with maskPos bits we have skipped the remaining
	maskPos bits in parent[0], so we need to add 1 to elemIndex.

	Because bitsNum is a power of 2, n / bitsNum == n >> bitsNum.bsf
	*/
	import core.bitop : bsf;
	immutable size_t elemIndex = sign * (((n - maskPos) >> bitsNum.bsf) + 1);

	/*
	Since the indexing is from MSB to LSB, we need to subtract the
	remainder from the number of bits that the element type has.

	Because bitsNum is a power of 2, n % bitsNum == n & (bitsNum - 1)
	*/
	immutable size_t elemMaskPos = (sign ^ 1) * (maskPos - n)
	+ sign * (bitsNum - ((n - maskPos) & (bitsNum - 1)));

	return (parent[elemIndex] & mask(elemMaskPos)) != 0;
	}

	/**
	Assigns `flag` to the `n`th bit within the range
	*/
	void opIndexAssign(bool flag, size_t n)
	in
	{
	static if (hasLength!R)
	{
	assert(n < length, "Index out of bounds");
	}
	}
	body
	{
	import core.bitop : bsf;

	immutable sizediff_t sign = (maskPos - n - 1) >> (sizediff_t.sizeof * 8 - 1);
	immutable size_t elemIndex = sign * (((n - maskPos) >> bitsNum.bsf) + 1);
	immutable size_t elemMaskPos = (sign ^ 1) * (maskPos - n)
	+ sign * (bitsNum - ((n - maskPos) & (bitsNum - 1)));

	auto elem = parent[elemIndex];
	auto elemMask = mask(elemMaskPos);
	parent[elemIndex] = cast(UnsignedElemType)(flag * (elem \| elemMask)
	+ (flag ^ 1) * (elem & ~elemMask));
	}

	Bitwise!R opSlice()
	{
	return this.save;
	}

	Bitwise!R opSlice(size_t start, size_t end)
	in
	{
	assert(start < end, "Invalid bounds: end <= start");
	}
	body
	{
	import core.bitop : bsf;

	sizediff_t sign = (maskPos - start - 1) >> (sizediff_t.sizeof * 8 - 1);
	immutable size_t startElemIndex = sign * (((start - maskPos) >> bitsNum.bsf) + 1);
	immutable size_t startElemMaskPos = (sign ^ 1) * (maskPos - start)
	+ sign * (bitsNum - ((start - maskPos) & (bitsNum - 1)));

	immutable size_t sliceLen = end - start - 1;
	sign = (startElemMaskPos - sliceLen - 1) >> (sizediff_t.sizeof * 8 - 1);
	immutable size_t endElemIndex = startElemIndex
	+ sign * (((sliceLen - startElemMaskPos) >> bitsNum.bsf) + 1);
	immutable size_t endElemMaskPos = (sign ^ 1) * (startElemMaskPos - sliceLen)
	+ sign * (bitsNum - ((sliceLen - startElemMaskPos) & (bitsNum - 1)));

	typeof(return) result;
	// Get the slice to be returned from the parent
	result.parent = (parent[startElemIndex .. endElemIndex + 1]).save;
	result.maskPos = startElemMaskPos;
	static if (isBidirectionalRange!R)
	{
	result.backMaskPos = endElemMaskPos;
	}
	return result;
	}
	}

	private:
	auto mask(size_t maskPos)
	{
	return (1UL << (maskPos - 1UL));
	}
	}

	/**
	Bitwise adapter over an integral type range. Consumes the range elements bit by bit.

	Params:
	R = an integral input range to iterate over

	Returns:
	A `Bitwise` input range with propagated forward, bidirectional
	and random access capabilities
	*/
	auto bitwise(R)(auto ref R range)
	if (isInputRange!R && isIntegral!(ElementType!R))
	{
	return Bitwise!R(range);
	}

	private struct Bitwise2BigEndian(R)
	if (isInputRange!R && isIntegral!(ElementType!R))
	{
	private:
	alias ElemType = ElementType!R;
	alias UnsignedElemType = Unsigned!ElemType;

	R parent;
	enum bitsNum = ElemType.sizeof * 8;
	UnsignedElemType mask = 0;
	ElemType copy;

	import std.math : pow;
	enum startMask = UnsignedElemType.max / 2 + 1;

	public:
	this()(auto ref R range)
	{
	parent = range;
	// init
	copy = parent.front;
	parent.popFront;
	mask = startMask;
	}
	bool empty()
	{
	return mask == 0;
	}

	bool front()
	{
	assert(!empty);
	return (copy & mask) != 0;
	}

	void popFront()
	{
	if (mask == 1 && !parent.empty)
	{
	copy = parent.front;
	parent.popFront;
	mask = startMask;
	}
	else
	{
	mask >>= 1;
	}
	}
	}

	private struct Bitwise2LittleEndian(R)
	if (isInputRange!R && isIntegral!(ElementType!R))
	{
	private:
	alias ElemType = ElementType!R;
	alias UnsignedElemType = Unsigned!ElemType;

	R parent;
	enum bitsNum = ElemType.sizeof * 8;
	UnsignedElemType mask = 0;
	ElemType copy;

	enum endMask = UnsignedElemType(1) << (bitsNum - 1);

	public:
	this()(auto ref R range)
	{
	parent = range;
	// init
	copy = parent.front;
	parent.popFront;
	mask = 1;
	}
	bool empty()
	{
	return mask == 0;
	}

	bool front()
	{
	assert(!empty);
	return (copy & mask) != 0;
	}

	void popFront()
	{
	if (mask == endMask && !parent.empty)
	{
	copy = parent.front;
	parent.popFront;
	mask = 1;
	}
	else
	{
	mask <<= 1;
	}
	}
	}

	/**
	Bitwise adapter over an integral type range. Consumes the range elements bit by bit.

	Params:
	R = an integral input range to iterate over

	Returns:
	A `Bitwise` input range with propagated forward, bidirectional
	and random access capabilities
	*/
	auto bitwise2BigEndian(R)(auto ref R range)
	if (isInputRange!R && isIntegral!(ElementType!R))
	{
	return Bitwise2BigEndian!R(range);
	}

	auto bitwise2LittleEndian(R)(auto ref R range)
	if (isInputRange!R && isIntegral!(ElementType!R))
	{
	return Bitwise2LittleEndian!R(range);
	}

	private struct Bitwise3
	{
	private:
	BitRange parent;
	size_t pos;

	public:
	this(const(size_t)* bitarr, size_t numbits) @system
	{
	parent = BitRange(bitarr, numbits);
	}

	bool empty()
	{
	return pos >= parent.len;
	}

	bool front()
	{
	if (pos == parent.idx)
	return true;
	else
	return false;
	}

	void popFront()
	{
	if (pos < parent.idx)
	{
	pos++;
	return;
	}

	if (!parent.empty)
	{
	parent.popFront;
	}
	pos++;
	}
	}

	unittest
	{
	import std.stdio;
	auto arr = [2UL, 3UL];
	//arr.bitwise2LittleEndian.writeln;
	//Bitwise3(&arr[0], 128).array.writeln;
	}

	unittest
	{
	import std.stdio;
	auto arr = [3UL];
	bt(arr.ptr, 1).writeln; // 1
	bts(arr.ptr, 2);
	arr[0].writeln; // 7

	arr = [3UL]; // reset
	auto r = arr.bitwise;
	r[1].writeln; // false
	r[2] = true;
	arr[0].writeln; // 2305843009213693955
	}

	/**
	* Range over bit set. Each element is the bit number that is set.
	*
	* This is more efficient than testing each bit in a sparsely populated bit
	* set. Note that the first bit in the bit set would be bit 0.
	*/
	struct BitRange
	{
	/// Number of bits in each size_t
	enum bitsPerWord = size_t.sizeof * 8;

	const(size_t)*bits; // points at next word of bits to check
	size_t cur; // needed to allow searching bits using bsf
	size_t idx; // index of current set bit
	size_t len; // number of bits in the bit set.
	@nogc nothrow pure:

	/**
	* Construct a BitRange.
	*
	* Params:
	* bitarr - The array of bits to iterate over
	* numBits - The total number of valid bits in the given bit array
	*/
	this(const(size_t)* bitarr, size_t numbits) @system
	{
	bits = bitarr;
	len = numbits;
	if (len)
	{
	// prime the first bit
	cur = *bits++ ^ 1;
	popFront();
	}
	}

	/// Range functions
	size_t front()
	{
	assert(!empty);
	return idx;
	}

	/// ditto
	bool empty() const
	{
	return idx >= len;
	}

	/// ditto
	void popFront() @system
	{
	// clear the current bit
	auto curbit = idx % bitsPerWord;
	cur ^= size_t(1) << curbit;
	if(!cur)
	{
	// find next size_t with set bit
	idx -= curbit;
	while (!cur)
	{
	if ((idx += bitsPerWord) >= len)
	// now empty
	return;
	cur = *bits++;
	}
	idx += bsf(cur);
	}
	else
	{
	idx += bsf(cur) - curbit;
	}
	}
	}
	> dmd -O -release -boundscheck=off bitarray.d bitwise.d && ./bitarray

	bitwise.filter = 1 minute, 2 secs, 925 ms, 145 μs, and 3 hnsecs
	bitwise.foreach = 38 secs, 534 ms, 801 μs, and 3 hnsecs
	bitwise2Big.foreach = 18 secs, 986 ms, 474 μs, and 6 hnsecs
	bitwise2Little.foreach = 20 secs, 882 ms, 840 μs, and 6 hnsecs
	bitwise3.foreach = 24 secs, 396 ms, 409 μs, and 7 hnsecs
	bitrange.foreach = 3 secs, 986 ms, 697 μs, and 9 hnsecs

	> ldc -O5 -release -boundscheck=off bitarray.d bitwise.d && ./bitarray
	bitwise.filter = 29 secs, 259 ms, 840 μs, and 9 hnsecs
	bitwise.foreach = 24 secs, 668 ms, 420 μs, and 2 hnsecs
	bitwise2Big.foreach = 16 secs, 778 ms, 923 μs, and 5 hnsecs
	bitwise2Little.foreach = 16 secs, 849 ms, 760 μs, and 2 hnsecs
	bitwise3.foreach = 21 secs, 905 ms, 458 μs, and 8 hnsecs
	bitrange.foreach = 3 secs, 580 ms, and 864 μs