mburbea · February 24, 2019 12:51
diff --git a/Bits.cs b/Bits.cs
 namespace System { 
    // Little-Endian based approaches most likely do not work on big-endian hardware.
    // Code based on examples found in
    // Bit Twiddling hacks: 
    // https://graphics.stanford.edu/~seander/bithacks.html
    // Chess programming wiki:
    // https://chessprogramming.wikispaces.com/BitScan
    public static class Bits
    {

        public static int PopCount(ulong value)
        {
            // Use a SWAR technique to count the population in parrallel.
            // I have no idea about naming these constants.
            const ulong k1 = ~0ul / 3;
            const ulong k2 = ~0ul / 5;
            const ulong k4 = ~0ul / 17;
            const ulong kf = ~0ul / 255;

            value = value - ((value >> 1) & k1);
            value = (value & k2) + ((value >> 2) & k2);
            value = (value + (value >> 4)) & k4;
            value = (value * kf) >> 56; //
            return (int) value;
        }

        public static int PopCount(long value) => PopCount((ulong) value);

        // these might be faster due to the cheaper opcode encoding of constants.
        public static int PopCount(uint value)
        {
            const uint k1 = ~0u / 3;
            const uint k2 = ~0u / 5;
            const uint k4 = ~0u / 17;
            const uint kf = ~0u / 255;

            value = value - ((value >> 1) & k1);
            value = (value & k2) + ((value >> 2) & k2);
            value = (value + (value >> 4)) & k4;
            value = (value * kf) >> 24;
            return (int) value;
        }

        public static int PopCount(int value) => PopCount((uint) value);
        // CoreCLR optimizes these rotations into rol/ror opcodes:
        // See CoreCLR#1830
        public static ulong RotateLeft(ulong value, int offset)
        {
            return (value << (offset & 63)) | (value >> ((64 - offset) & 63));
        }

        public static ulong RotateLeft(uint value, int offset)
        {
            return (value << (offset & 31)) | (value >> ((32 - offset) & 31));
        }

        public static ulong RotateRight(ulong value, int offset)
        {
            return (value >> (offset & 63)) | (value << ((64 - offset) & 63));
        }

        public static ulong RotateRight(uint value, int offset)
        {
            return (value >> (offset & 31)) | (value << ((32 - offset) & 31));
        }

        // How this works::
        // In the count trailing zeros function we perform least significant bit (lsb) isolation.
        // Doing so we take our long value and convert it into something like (2^N), where n is the lsb. 
        // Multiplying anything by an integer than only has the n-th bit set is equivalent to
        // a left shift by n. This constant represents 64 overlapping 6 bit sequences (0-63) which are 
        // accessible by shifting, and then extracting the highest byte in the word. These unique values
        // essentially allow us to treat this as a minimal perfect hash into the table below to look up
        // the bit positions.
        // We don't use traditional lsb isolation (x & (x-1)) which just has the n-th bit set, but a form
        // that sets all lower bits as well. Thus we now have 2*(2^N)-1. For the most part this just means
        // we have to increase the value we right shift by one (e.g. instead of shifting by 57 we shift by 58).
        // This form is slightly faster on intel core architecture.
        // THe other reason to do this, is that we can reuse the same const and table for count leading zeroes.
        private const ulong Debruijn64 = 0x03F79D71B4CB0A89UL;

        private static readonly int[] Debruijn64Table =
        {
            0,  47, 1,  56, 48, 27, 2,  60,
            57, 49, 41, 37, 28, 16, 3,  61,
            54, 58, 35, 52, 50, 42, 21, 44,
            38, 32, 29, 23, 17, 11, 4,  62,
            46, 55, 26, 59, 40, 36, 15, 53,
            34, 51, 20, 43, 31, 22, 10, 45,
            25, 39, 14, 33, 19, 30, 9,  24,
            13, 18, 8,  12, 7,   6, 5,  63
        };

        private const uint Debruijn32 = 0x07C4ACDDU;

        private static readonly int[] Debruijn32Table =
        {
            0,  9,  1,  10, 13, 21, 2,  29,
            11, 14, 16, 18, 22, 25, 3,  30,
            8,  12, 20, 28, 15, 17, 24,  7,
            19, 27, 23,  6, 26,  5,  4, 31
        };

        // BitScanReverse/BitScanForward could be an interesting compliment to these methods as we then would not need
        // this ^ 63 or the check against zero (bitscans are undefined on zero).
        public static int CountLeadingZeroes(ulong value)
        {
            if (value == 0) return 64;
            value |= value >> 1;
            value |= value >> 2;
            value |= value >> 4;
            value |= value >> 8;
            value |= value >> 16;
            value |= value >> 32;
            return Debruijn64Table[(value * Debruijn64) >> 58] ^ 63;
        }

        public static int CountLeadingZeroes(long value) => CountLeadingZeroes((ulong) value);

        public static int CountLeadingZeroes(uint value)
        {
            if (value == 0) return 32;
            value |= value >> 1;
            value |= value >> 2;
            value |= value >> 4;
            value |= value >> 8;
            value |= value >> 16;
            return Debruijn32Table[(value * Debruijn32) >> 27] ^ 31;
        }

        public static int CountLeadingZeroes(int value) => CountLeadingZeroes((uint) value);

        public static int CountTrailingZeroes(ulong value)
        {
            if (value == 0) return 64;
            return Debruijn64Table[((value ^ (value - 1)) * Debruijn64) >> 58];
        }

        public static int CountTrailingZeroes(long value) => CountTrailingZeroes((ulong) value);

        public static int CountTrailingZeroes(uint value)
        {
            if (value == 0) return 32;
            return Debruijn32Table[((value ^ (value - 1)) * Debruijn32) >> 27];
        }

        public static int CountTrailingZeroes(int value) => CountTrailingZeroes((uint) value);

        public static bool ReadBit(ulong value, int offset)
        {
            return (value & (1ul << offset)) != 0;
        }

        public static bool ReadBit(long value, int offset) => ReadBit((ulong) value, offset);


        public static bool ReadBit(uint value, int offset)
        {
            return (value & (1u << offset)) != 0;
        }

        public static bool ReadBit(int value, int offset) => ReadBit((uint) value, offset);


        public static ulong WriteBit(ulong value, int offset, bool toSet)
        {
            return (value & ~(0x1ul << offset)) | ((toSet ? 1ul : 0ul) << offset);
        }

        public static long WriteBit(long value, int offset, bool toSet) => (long) WriteBit((ulong) value, offset, toSet);

        public static uint WriteBit(uint value, int offset, bool toSet)
        {
            return (value & ~(0x1u << offset)) | ((toSet ? 1u : 0u) << offset);
        }

        public static int WriteBit(int value, int offset, bool toSet) => (int) WriteBit((uint) value, offset, toSet);


        public static byte ReadByte(ulong value, int offset)
        {
            return (byte) ((value & (0xFFul << (offset * 8))) >> (offset * 8));
        }

        public static byte ReadByte(long value, int offset) => ReadByte((ulong) value, offset);

        public static byte ReadByte(uint value, int offset)
        {
            return (byte) ((value & (0xFFu << (offset * 8))) >> (offset * 8));
        }

        public static byte ReadByte(int value, int offset) => ReadByte((uint) value, offset);

        public static short ReadInt16(ulong value, int offset)
        {
            return (short) ((value & (0xFFFFul << (offset * 16))) >> (offset * 16));
        }

        public static short ReadInt16(long value, int offset) => ReadInt16((ulong) value, offset);

        public static short ReadInt16(uint value, int offset)
        {
            return (short) ((value & (0xFFFFu << (offset * 16))) >> (offset * 16));
        }

        public static short ReadInt16(int value, int offset) => ReadInt16((uint) value, offset);


        public static int ReadInt32(ulong value, int offset)
        {
            return (int) ((value & (~0u << (offset * 32))) >> (offset * 32));
        }

        public static int ReadInt32(long value, int offset) => ReadInt32((ulong) value, offset);


        public static ulong WriteByte(ulong value, int offset, byte toSet)
        {
            return (value & ~(0xFFul << (offset * 8))) | ((ulong) toSet << (offset * 8));
        }

        public static long WriteByte(long value, int offset, byte toSet)
            => (long) WriteByte((ulong) value, offset, toSet);

        public static uint WriteByte(uint value, int offset, byte toSet)
        {
            return (value & ~(0xFFu << (offset * 8))) | ((uint) toSet << (offset * 8));
        }

        public static int WriteByte(int value, int offset, byte toSet) => (int) WriteByte((uint) value, offset, toSet);

        // sign extension is the devil and screws this all up.
        // Essentially, when converting a smaller signed type to ulong, the highest bit is extended to
        // the remaining bits. For a positive value this works as expected, but for negative number
        // this has the unexpected result of setting all the higher bits, thus using something like
        // setInt16(value,0,unchecked((short)0x8000)) will result setting the value to 0xFFFFFFFFFF8000
        // to fix this we first convert to the unsigned of the same type.
        public static ulong WriteInt16(ulong value, int offset, short toSet)
        {
            return (value & ~(0xFFFFul << (offset * 16))) | ((ulong) (ushort) toSet << (offset * 16));
        }

        public static long WriteInt16(long value, int offset, short toSet)
            => (long) WriteInt16((ulong) value, offset, toSet);

        public static uint WriteInt16(uint value, int offset, short toSet)
        {
            return (value & ~(0xFFFFu << (offset * 16))) | ((uint) (ushort) toSet << (offset * 16));
        }

        public static int WriteInt16(int value, int offset, short toSet)
            => (int) WriteInt16((uint) value, offset, toSet);


        public static ulong WriteInt32(ulong value, int offset, int toSet)
        {
            return (value & ~(0xFFFFFFFFul << (offset * 32))) | ((ulong) (uint) toSet << (offset * 32));
        }

        public static long WriteInt32(long value, int offset, int toSet)
            => (long) WriteInt32((ulong) value, offset, toSet);
    }
 }
	namespace System {
	// Little-Endian based approaches most likely do not work on big-endian hardware.
	// Code based on examples found in
	// Bit Twiddling hacks:
	// https://graphics.stanford.edu/~seander/bithacks.html
	// Chess programming wiki:
	// https://chessprogramming.wikispaces.com/BitScan
	public static class Bits
	{

	public static int PopCount(ulong value)
	{
	// Use a SWAR technique to count the population in parrallel.
	// I have no idea about naming these constants.
	const ulong k1 = ~0ul / 3;
	const ulong k2 = ~0ul / 5;
	const ulong k4 = ~0ul / 17;
	const ulong kf = ~0ul / 255;

	value = value - ((value >> 1) & k1);
	value = (value & k2) + ((value >> 2) & k2);
	value = (value + (value >> 4)) & k4;
	value = (value * kf) >> 56; //
	return (int) value;
	}

	public static int PopCount(long value) => PopCount((ulong) value);

	// these might be faster due to the cheaper opcode encoding of constants.
	public static int PopCount(uint value)
	{
	const uint k1 = ~0u / 3;
	const uint k2 = ~0u / 5;
	const uint k4 = ~0u / 17;
	const uint kf = ~0u / 255;

	value = value - ((value >> 1) & k1);
	value = (value & k2) + ((value >> 2) & k2);
	value = (value + (value >> 4)) & k4;
	value = (value * kf) >> 24;
	return (int) value;
	}

	public static int PopCount(int value) => PopCount((uint) value);
	// CoreCLR optimizes these rotations into rol/ror opcodes:
	// See CoreCLR#1830
	public static ulong RotateLeft(ulong value, int offset)
	{
	return (value << (offset & 63)) \| (value >> ((64 - offset) & 63));
	}

	public static ulong RotateLeft(uint value, int offset)
	{
	return (value << (offset & 31)) \| (value >> ((32 - offset) & 31));
	}

	public static ulong RotateRight(ulong value, int offset)
	{
	return (value >> (offset & 63)) \| (value << ((64 - offset) & 63));
	}

	public static ulong RotateRight(uint value, int offset)
	{
	return (value >> (offset & 31)) \| (value << ((32 - offset) & 31));
	}

	// How this works::
	// In the count trailing zeros function we perform least significant bit (lsb) isolation.
	// Doing so we take our long value and convert it into something like (2^N), where n is the lsb.
	// Multiplying anything by an integer than only has the n-th bit set is equivalent to
	// a left shift by n. This constant represents 64 overlapping 6 bit sequences (0-63) which are
	// accessible by shifting, and then extracting the highest byte in the word. These unique values
	// essentially allow us to treat this as a minimal perfect hash into the table below to look up
	// the bit positions.
	// We don't use traditional lsb isolation (x & (x-1)) which just has the n-th bit set, but a form
	// that sets all lower bits as well. Thus we now have 2*(2^N)-1. For the most part this just means
	// we have to increase the value we right shift by one (e.g. instead of shifting by 57 we shift by 58).
	// This form is slightly faster on intel core architecture.
	// THe other reason to do this, is that we can reuse the same const and table for count leading zeroes.
	private const ulong Debruijn64 = 0x03F79D71B4CB0A89UL;

	private static readonly int[] Debruijn64Table =
	{
	0, 47, 1, 56, 48, 27, 2, 60,
	57, 49, 41, 37, 28, 16, 3, 61,
	54, 58, 35, 52, 50, 42, 21, 44,
	38, 32, 29, 23, 17, 11, 4, 62,
	46, 55, 26, 59, 40, 36, 15, 53,
	34, 51, 20, 43, 31, 22, 10, 45,
	25, 39, 14, 33, 19, 30, 9, 24,
	13, 18, 8, 12, 7, 6, 5, 63
	};

	private const uint Debruijn32 = 0x07C4ACDDU;

	private static readonly int[] Debruijn32Table =
	{
	0, 9, 1, 10, 13, 21, 2, 29,
	11, 14, 16, 18, 22, 25, 3, 30,
	8, 12, 20, 28, 15, 17, 24, 7,
	19, 27, 23, 6, 26, 5, 4, 31
	};

	// BitScanReverse/BitScanForward could be an interesting compliment to these methods as we then would not need
	// this ^ 63 or the check against zero (bitscans are undefined on zero).
	public static int CountLeadingZeroes(ulong value)
	{
	if (value == 0) return 64;
	value \|= value >> 1;
	value \|= value >> 2;
	value \|= value >> 4;
	value \|= value >> 8;
	value \|= value >> 16;
	value \|= value >> 32;
	return Debruijn64Table[(value * Debruijn64) >> 58] ^ 63;
	}

	public static int CountLeadingZeroes(long value) => CountLeadingZeroes((ulong) value);

	public static int CountLeadingZeroes(uint value)
	{
	if (value == 0) return 32;
	value \|= value >> 1;
	value \|= value >> 2;
	value \|= value >> 4;
	value \|= value >> 8;
	value \|= value >> 16;
	return Debruijn32Table[(value * Debruijn32) >> 27] ^ 31;
	}

	public static int CountLeadingZeroes(int value) => CountLeadingZeroes((uint) value);

	public static int CountTrailingZeroes(ulong value)
	{
	if (value == 0) return 64;
	return Debruijn64Table[((value ^ (value - 1)) * Debruijn64) >> 58];
	}

	public static int CountTrailingZeroes(long value) => CountTrailingZeroes((ulong) value);

	public static int CountTrailingZeroes(uint value)
	{
	if (value == 0) return 32;
	return Debruijn32Table[((value ^ (value - 1)) * Debruijn32) >> 27];
	}

	public static int CountTrailingZeroes(int value) => CountTrailingZeroes((uint) value);

	public static bool ReadBit(ulong value, int offset)
	{
	return (value & (1ul << offset)) != 0;
	}

	public static bool ReadBit(long value, int offset) => ReadBit((ulong) value, offset);


	public static bool ReadBit(uint value, int offset)
	{
	return (value & (1u << offset)) != 0;
	}

	public static bool ReadBit(int value, int offset) => ReadBit((uint) value, offset);


	public static ulong WriteBit(ulong value, int offset, bool toSet)
	{
	return (value & ~(0x1ul << offset)) \| ((toSet ? 1ul : 0ul) << offset);
	}

	public static long WriteBit(long value, int offset, bool toSet) => (long) WriteBit((ulong) value, offset, toSet);

	public static uint WriteBit(uint value, int offset, bool toSet)
	{
	return (value & ~(0x1u << offset)) \| ((toSet ? 1u : 0u) << offset);
	}

	public static int WriteBit(int value, int offset, bool toSet) => (int) WriteBit((uint) value, offset, toSet);


	public static byte ReadByte(ulong value, int offset)
	{
	return (byte) ((value & (0xFFul << (offset * 8))) >> (offset * 8));
	}

	public static byte ReadByte(long value, int offset) => ReadByte((ulong) value, offset);

	public static byte ReadByte(uint value, int offset)
	{
	return (byte) ((value & (0xFFu << (offset * 8))) >> (offset * 8));
	}

	public static byte ReadByte(int value, int offset) => ReadByte((uint) value, offset);

	public static short ReadInt16(ulong value, int offset)
	{
	return (short) ((value & (0xFFFFul << (offset * 16))) >> (offset * 16));
	}

	public static short ReadInt16(long value, int offset) => ReadInt16((ulong) value, offset);

	public static short ReadInt16(uint value, int offset)
	{
	return (short) ((value & (0xFFFFu << (offset * 16))) >> (offset * 16));
	}

	public static short ReadInt16(int value, int offset) => ReadInt16((uint) value, offset);


	public static int ReadInt32(ulong value, int offset)
	{
	return (int) ((value & (~0u << (offset * 32))) >> (offset * 32));
	}

	public static int ReadInt32(long value, int offset) => ReadInt32((ulong) value, offset);


	public static ulong WriteByte(ulong value, int offset, byte toSet)
	{
	return (value & ~(0xFFul << (offset * 8))) \| ((ulong) toSet << (offset * 8));
	}

	public static long WriteByte(long value, int offset, byte toSet)
	=> (long) WriteByte((ulong) value, offset, toSet);

	public static uint WriteByte(uint value, int offset, byte toSet)
	{
	return (value & ~(0xFFu << (offset * 8))) \| ((uint) toSet << (offset * 8));
	}

	public static int WriteByte(int value, int offset, byte toSet) => (int) WriteByte((uint) value, offset, toSet);

	// sign extension is the devil and screws this all up.
	// Essentially, when converting a smaller signed type to ulong, the highest bit is extended to
	// the remaining bits. For a positive value this works as expected, but for negative number
	// this has the unexpected result of setting all the higher bits, thus using something like
	// setInt16(value,0,unchecked((short)0x8000)) will result setting the value to 0xFFFFFFFFFF8000
	// to fix this we first convert to the unsigned of the same type.
	public static ulong WriteInt16(ulong value, int offset, short toSet)
	{
	return (value & ~(0xFFFFul << (offset * 16))) \| ((ulong) (ushort) toSet << (offset * 16));
	}

	public static long WriteInt16(long value, int offset, short toSet)
	=> (long) WriteInt16((ulong) value, offset, toSet);

	public static uint WriteInt16(uint value, int offset, short toSet)
	{
	return (value & ~(0xFFFFu << (offset * 16))) \| ((uint) (ushort) toSet << (offset * 16));
	}

	public static int WriteInt16(int value, int offset, short toSet)
	=> (int) WriteInt16((uint) value, offset, toSet);


	public static ulong WriteInt32(ulong value, int offset, int toSet)
	{
	return (value & ~(0xFFFFFFFFul << (offset * 32))) \| ((ulong) (uint) toSet << (offset * 32));
	}

	public static long WriteInt32(long value, int offset, int toSet)
	=> (long) WriteInt32((ulong) value, offset, toSet);
	}
	}