Created
August 3, 2013 13:31
-
-
Save gpakosz/6146459 to your computer and use it in GitHub Desktop.
Loop unrolling and code generation with self inclusion and preprocessing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "PackedArray.h" | |
#include <assert.h> | |
void __PackedArray_pack_1(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)1) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)1) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 1) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 1) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 1) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 1) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 1) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 1) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 1) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 1) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 1) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 1) % 32); | |
case 10: | |
packed |= *in++ << ((10 * 1) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 1) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 1) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 1) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 1) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 1) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 1) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 1) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 1) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 1) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 1) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 1) % 32); | |
case 22: | |
packed |= *in++ << ((22 * 1) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 1) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 1) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 1) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 1) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 1) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 1) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 1) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 1) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 1) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 1) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 1) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 1) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 1) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 1) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 1) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 1) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 1) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 1) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 1) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 1) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 1) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 1) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 1) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 1) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 1) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 1) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 1) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 1) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 1) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 1) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 1) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 1) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 1) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 1) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 1) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 1) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 1) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 1) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 1) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 1) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 1) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 1 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)1 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_1(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)1) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 10: | |
*out++ = (packed >> ((10 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 22: | |
*out++ = (packed >> ((22 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 1) % 32)) & (uint32_t)((1ULL << 1) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_2(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)2) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)2) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 2) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 2) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 2) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 2) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 2) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 2) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 2) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 2) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 2) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 2) % 32); | |
case 10: | |
packed |= *in++ << ((10 * 2) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 2) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 2) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 2) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 2) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 2) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 2) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 2) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 2) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 2) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 2) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 2) % 32); | |
case 22: | |
packed |= *in++ << ((22 * 2) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 2) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 2) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 2) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 2) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 2) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 2) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 2) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 2) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 2) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 2) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 2) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 2) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 2) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 2) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 2) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 2) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 2) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 2) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 2) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 2) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 2) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 2) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 2) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 2) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 2) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 2) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 2) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 2) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 2) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 2) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 2) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 2) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 2) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 2) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 2) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 2) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 2) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 2) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 2) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 2) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 2) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 2 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)2 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_2(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)2) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 10: | |
*out++ = (packed >> ((10 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 22: | |
*out++ = (packed >> ((22 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 2) % 32)) & (uint32_t)((1ULL << 2) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_3(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)3) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)3) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 3) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 3) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 3) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 3) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 3) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 3) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 3) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 3) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 3) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 3) % 32); | |
case 10: | |
packed |= *in << ((10 * 3) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 3) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 3) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 3) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 3) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 3) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 3) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 3) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 3) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 3) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 3) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 3) % 32); | |
case 21: | |
packed |= *in << ((21 * 3) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 3) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 3) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 3) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 3) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 3) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 3) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 3) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 3) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 3) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 3) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 3) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 3) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 3) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 3) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 3) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 3) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 3) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 3) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 3) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 3) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 3) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 3) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 3) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 3) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 3) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 3) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 3) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 3) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 3) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 3) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 3) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 3) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 3) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 3) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 3) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 3) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 3) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 3) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 3) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 3) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 3) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 3) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 3) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 3) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 3) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 3 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)3 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_3(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)3) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 3) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 3) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 3) - 1) >> (32 - ((10 * 3) % 32)) << (32 - ((10 * 3) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 3) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 3) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 3) - 1) >> (32 - ((21 * 3) % 32)) << (32 - ((21 * 3) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 3) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 3) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 3) - 1) >> (32 - ((10 * 3) % 32)) << (32 - ((10 * 3) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 3) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 3) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 3) - 1) >> (32 - ((21 * 3) % 32)) << (32 - ((21 * 3) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 3) % 32)) & (uint32_t)((1ULL << 3) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_4(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)4) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)4) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 4) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 4) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 4) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 4) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 4) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 4) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 4) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 4) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 4) % 32); | |
case 10: | |
packed |= *in++ << ((10 * 4) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 4) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 4) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 4) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 4) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 4) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 4) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 4) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 4) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 4) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 4) % 32); | |
case 22: | |
packed |= *in++ << ((22 * 4) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 4) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 4) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 4) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 4) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 4) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 4) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 4) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 4) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 4) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 4) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 4) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 4) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 4) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 4) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 4) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 4) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 4) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 4) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 4) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 4) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 4) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 4) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 4) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 4) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 4) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 4) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 4) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 4) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 4) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 4) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 4) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 4) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 4) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 4) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 4) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 4) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 4 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)4 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_4(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)4) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 10: | |
*out++ = (packed >> ((10 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 22: | |
*out++ = (packed >> ((22 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 4) % 32)) & (uint32_t)((1ULL << 4) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_5(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)5) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)5) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 5) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 5) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 5) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 5) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 5) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 5) % 32); | |
case 6: | |
packed |= *in << ((6 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 5) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 5) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 5) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 5) % 32); | |
case 10: | |
packed |= *in++ << ((10 * 5) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 5) % 32); | |
case 12: | |
packed |= *in << ((12 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 5) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 5) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 5) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 5) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 5) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 5) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 5) % 32); | |
case 19: | |
packed |= *in << ((19 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 5) % 32)); | |
case 20: | |
packed |= *in++ << ((20 * 5) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 5) % 32); | |
case 22: | |
packed |= *in++ << ((22 * 5) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 5) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 5) % 32); | |
case 25: | |
packed |= *in << ((25 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 5) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 5) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 5) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 5) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 5) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 5) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 5) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 5) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 5) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 5) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 5) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 5) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 5) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 5) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 5) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 5) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 5) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 5) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 5) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 5) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 5) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 5) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 5) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 5) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 5) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 5) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 5) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 5) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 5) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 5) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 5) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 5) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 5) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 5) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 5) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 5) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 5) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 5) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 5) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 5) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 5 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)5 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_5(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)5) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((6 * 5) % 32)) << (32 - ((6 * 5) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 10: | |
*out++ = (packed >> ((10 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((12 * 5) % 32)) << (32 - ((12 * 5) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((19 * 5) % 32)) << (32 - ((19 * 5) % 32)))); | |
} | |
case 20: | |
*out++ = (packed >> ((20 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 22: | |
*out++ = (packed >> ((22 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((25 * 5) % 32)) << (32 - ((25 * 5) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((6 * 5) % 32)) << (32 - ((6 * 5) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((12 * 5) % 32)) << (32 - ((12 * 5) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((19 * 5) % 32)) << (32 - ((19 * 5) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 5) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 5) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 5) - 1) >> (32 - ((25 * 5) % 32)) << (32 - ((25 * 5) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 5) % 32)) & (uint32_t)((1ULL << 5) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_6(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)6) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)6) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 6) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 6) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 6) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 6) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 6) % 32); | |
case 5: | |
packed |= *in << ((5 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 6) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 6) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 6) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 6) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 6) % 32); | |
case 10: | |
packed |= *in << ((10 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 6) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 6) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 6) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 6) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 6) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 6) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 6) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 6) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 6) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 6) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 6) % 32); | |
case 21: | |
packed |= *in << ((21 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 6) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 6) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 6) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 6) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 6) % 32); | |
case 26: | |
packed |= *in << ((26 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 6) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 6) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 6) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 6) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 6) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 6) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 6) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 6) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 6) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 6) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 6) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 6) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 6) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 6) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 6) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 6) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 6) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 6) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 6) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 6) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 6) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 6) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 6) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 6) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 6) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 6) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 6) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 6) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 6) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 6) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 6) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 6) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 6) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 6) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 6) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 6) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 6) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 6) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 6) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 6 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)6 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_6(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)6) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((5 * 6) % 32)) << (32 - ((5 * 6) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((10 * 6) % 32)) << (32 - ((10 * 6) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((21 * 6) % 32)) << (32 - ((21 * 6) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((26 * 6) % 32)) << (32 - ((26 * 6) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((5 * 6) % 32)) << (32 - ((5 * 6) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((10 * 6) % 32)) << (32 - ((10 * 6) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((21 * 6) % 32)) << (32 - ((21 * 6) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 6) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 6) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 6) - 1) >> (32 - ((26 * 6) % 32)) << (32 - ((26 * 6) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 6) % 32)) & (uint32_t)((1ULL << 6) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_7(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)7) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)7) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 7) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 7) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 7) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 7) % 32); | |
case 4: | |
packed |= *in << ((4 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 7) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 7) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 7) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 7) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 7) % 32); | |
case 9: | |
packed |= *in << ((9 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 7) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 7) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 7) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 7) % 32); | |
case 13: | |
packed |= *in << ((13 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 7) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 7) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 7) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 7) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 7) % 32); | |
case 18: | |
packed |= *in << ((18 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 7) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 7) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 7) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 7) % 32); | |
case 22: | |
packed |= *in << ((22 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 7) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 7) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 7) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 7) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 7) % 32); | |
case 27: | |
packed |= *in << ((27 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 7) % 32)); | |
case 28: | |
packed |= *in++ << ((28 * 7) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 7) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 7) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 7) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 7) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 7) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 7) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 7) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 7) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 7) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 7) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 7) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 7) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 7) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 7) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 7) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 7) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 7) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 7) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 7) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 7) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 7) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 7) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 7) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 7) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 7) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 7) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 7) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 7) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 7) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 7) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 7) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 7) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 7) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 7) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 7) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 7) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 7 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)7 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_7(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)7) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((4 * 7) % 32)) << (32 - ((4 * 7) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((9 * 7) % 32)) << (32 - ((9 * 7) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((13 * 7) % 32)) << (32 - ((13 * 7) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((18 * 7) % 32)) << (32 - ((18 * 7) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((22 * 7) % 32)) << (32 - ((22 * 7) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((27 * 7) % 32)) << (32 - ((27 * 7) % 32)))); | |
} | |
case 28: | |
*out++ = (packed >> ((28 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((4 * 7) % 32)) << (32 - ((4 * 7) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((9 * 7) % 32)) << (32 - ((9 * 7) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((13 * 7) % 32)) << (32 - ((13 * 7) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((18 * 7) % 32)) << (32 - ((18 * 7) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((22 * 7) % 32)) << (32 - ((22 * 7) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 7) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 7) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 7) - 1) >> (32 - ((27 * 7) % 32)) << (32 - ((27 * 7) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 7) % 32)) & (uint32_t)((1ULL << 7) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_8(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)8) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)8) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 8) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 8) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 8) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 4: | |
packed |= *in++ << ((4 * 8) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 8) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 8) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 8) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 8) % 32); | |
case 10: | |
packed |= *in++ << ((10 * 8) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 12: | |
packed |= *in++ << ((12 * 8) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 8) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 8) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 8) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 8) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 8) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 20: | |
packed |= *in++ << ((20 * 8) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 8) % 32); | |
case 22: | |
packed |= *in++ << ((22 * 8) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 8) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 8) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 8) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 28: | |
packed |= *in++ << ((28 * 8) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 8) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 8) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 8) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 8) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 8) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 8) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 8) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 8) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 8) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 8) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 8) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 8) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 8) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 8) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 8) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 8) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 8) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 8) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 8) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 8) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 8) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 8) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 8) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 8) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 8) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 8) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 8) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 8 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)8 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_8(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)8) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 10: | |
*out++ = (packed >> ((10 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 22: | |
*out++ = (packed >> ((22 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 8) % 32)) & (uint32_t)((1ULL << 8) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_9(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)9) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)9) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 9) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 9) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 9) % 32); | |
case 3: | |
packed |= *in << ((3 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 9) % 32)); | |
case 4: | |
packed |= *in++ << ((4 * 9) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 9) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 9) % 32); | |
case 7: | |
packed |= *in << ((7 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 9) % 32)); | |
case 8: | |
packed |= *in++ << ((8 * 9) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 9) % 32); | |
case 10: | |
packed |= *in << ((10 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 9) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 9) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 9) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 9) % 32); | |
case 14: | |
packed |= *in << ((14 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 9) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 9) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 9) % 32); | |
case 17: | |
packed |= *in << ((17 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 9) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 9) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 9) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 9) % 32); | |
case 21: | |
packed |= *in << ((21 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 9) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 9) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 9) % 32); | |
case 24: | |
packed |= *in << ((24 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 9) % 32)); | |
case 25: | |
packed |= *in++ << ((25 * 9) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 9) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 9) % 32); | |
case 28: | |
packed |= *in << ((28 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 9) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 9) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 9) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 9) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 9) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 9) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 9) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 9) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 9) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 9) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 9) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 9) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 9) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 9) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 9) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 9) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 9) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 9) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 9) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 9) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 9) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 9) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 9) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 9) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 9) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 9) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 9) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 9) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 9) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 9) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 9) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 9) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 9) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 9) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 9) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 9) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 9) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 9 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)9 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_9(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)9) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((3 * 9) % 32)) << (32 - ((3 * 9) % 32)))); | |
} | |
case 4: | |
*out++ = (packed >> ((4 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((7 * 9) % 32)) << (32 - ((7 * 9) % 32)))); | |
} | |
case 8: | |
*out++ = (packed >> ((8 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((10 * 9) % 32)) << (32 - ((10 * 9) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((14 * 9) % 32)) << (32 - ((14 * 9) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((17 * 9) % 32)) << (32 - ((17 * 9) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((21 * 9) % 32)) << (32 - ((21 * 9) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((24 * 9) % 32)) << (32 - ((24 * 9) % 32)))); | |
} | |
case 25: | |
*out++ = (packed >> ((25 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((28 * 9) % 32)) << (32 - ((28 * 9) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((3 * 9) % 32)) << (32 - ((3 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((7 * 9) % 32)) << (32 - ((7 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((10 * 9) % 32)) << (32 - ((10 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((14 * 9) % 32)) << (32 - ((14 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((17 * 9) % 32)) << (32 - ((17 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((21 * 9) % 32)) << (32 - ((21 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((24 * 9) % 32)) << (32 - ((24 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 9) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 9) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 9) - 1) >> (32 - ((28 * 9) % 32)) << (32 - ((28 * 9) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 9) % 32)) & (uint32_t)((1ULL << 9) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_10(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)10) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)10) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 10) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 10) % 32); | |
case 2: | |
packed |= *in++ << ((2 * 10) % 32); | |
case 3: | |
packed |= *in << ((3 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 10) % 32)); | |
case 4: | |
packed |= *in++ << ((4 * 10) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 10) % 32); | |
case 6: | |
packed |= *in << ((6 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 10) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 10) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 10) % 32); | |
case 9: | |
packed |= *in << ((9 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 10) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 10) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 10) % 32); | |
case 12: | |
packed |= *in << ((12 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 10) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 10) % 32); | |
case 14: | |
packed |= *in++ << ((14 * 10) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 10) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 10) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 10) % 32); | |
case 18: | |
packed |= *in++ << ((18 * 10) % 32); | |
case 19: | |
packed |= *in << ((19 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 10) % 32)); | |
case 20: | |
packed |= *in++ << ((20 * 10) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 10) % 32); | |
case 22: | |
packed |= *in << ((22 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 10) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 10) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 10) % 32); | |
case 25: | |
packed |= *in << ((25 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 10) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 10) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 10) % 32); | |
case 28: | |
packed |= *in << ((28 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 10) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 10) % 32); | |
case 30: | |
packed |= *in++ << ((30 * 10) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 10) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 10) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 10) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 10) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 10) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 10) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 10) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 10) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 10) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 10) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 10) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 10) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 10) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 10) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 10) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 10) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 10) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 10) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 10) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 10) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 10) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 10) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 10) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 10) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 10) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 10) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 10) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 10) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 10) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 10) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 10) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 10) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 10) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 10) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 10 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)10 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_10(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)10) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 2: | |
*out++ = (packed >> ((2 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((3 * 10) % 32)) << (32 - ((3 * 10) % 32)))); | |
} | |
case 4: | |
*out++ = (packed >> ((4 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((6 * 10) % 32)) << (32 - ((6 * 10) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((9 * 10) % 32)) << (32 - ((9 * 10) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((12 * 10) % 32)) << (32 - ((12 * 10) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 14: | |
*out++ = (packed >> ((14 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 18: | |
*out++ = (packed >> ((18 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((19 * 10) % 32)) << (32 - ((19 * 10) % 32)))); | |
} | |
case 20: | |
*out++ = (packed >> ((20 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((22 * 10) % 32)) << (32 - ((22 * 10) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((25 * 10) % 32)) << (32 - ((25 * 10) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((28 * 10) % 32)) << (32 - ((28 * 10) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 30: | |
*out++ = (packed >> ((30 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((3 * 10) % 32)) << (32 - ((3 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((6 * 10) % 32)) << (32 - ((6 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((9 * 10) % 32)) << (32 - ((9 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((12 * 10) % 32)) << (32 - ((12 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((19 * 10) % 32)) << (32 - ((19 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((22 * 10) % 32)) << (32 - ((22 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((25 * 10) % 32)) << (32 - ((25 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 10) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 10) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 10) - 1) >> (32 - ((28 * 10) % 32)) << (32 - ((28 * 10) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 10) % 32)) & (uint32_t)((1ULL << 10) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_11(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)11) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)11) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 11) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 11) % 32); | |
case 2: | |
packed |= *in << ((2 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 11) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 11) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 11) % 32); | |
case 5: | |
packed |= *in << ((5 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 11) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 11) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 11) % 32); | |
case 8: | |
packed |= *in << ((8 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 11) % 32)); | |
case 9: | |
packed |= *in++ << ((9 * 11) % 32); | |
case 10: | |
packed |= *in++ << ((10 * 11) % 32); | |
case 11: | |
packed |= *in << ((11 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 11) % 32)); | |
case 12: | |
packed |= *in++ << ((12 * 11) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 11) % 32); | |
case 14: | |
packed |= *in << ((14 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 11) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 11) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 11) % 32); | |
case 17: | |
packed |= *in << ((17 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 11) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 11) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 11) % 32); | |
case 20: | |
packed |= *in << ((20 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 11) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 11) % 32); | |
case 22: | |
packed |= *in++ << ((22 * 11) % 32); | |
case 23: | |
packed |= *in << ((23 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 11) % 32)); | |
case 24: | |
packed |= *in++ << ((24 * 11) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 11) % 32); | |
case 26: | |
packed |= *in << ((26 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 11) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 11) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 11) % 32); | |
case 29: | |
packed |= *in << ((29 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 11) % 32)); | |
case 30: | |
packed |= *in++ << ((30 * 11) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 11) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 11) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 11) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 11) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 11) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 11) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 11) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 11) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 11) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 11) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 11) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 11) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 11) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 11) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 11) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 11) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 11) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 11) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 11) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 11) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 11) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 11) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 11) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 11) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 11) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 11) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 11) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 11) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 11) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 11) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 11) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 11) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 11) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 11) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 11 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)11 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_11(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)11) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((2 * 11) % 32)) << (32 - ((2 * 11) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((5 * 11) % 32)) << (32 - ((5 * 11) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((8 * 11) % 32)) << (32 - ((8 * 11) % 32)))); | |
} | |
case 9: | |
*out++ = (packed >> ((9 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 10: | |
*out++ = (packed >> ((10 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((11 * 11) % 32)) << (32 - ((11 * 11) % 32)))); | |
} | |
case 12: | |
*out++ = (packed >> ((12 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((14 * 11) % 32)) << (32 - ((14 * 11) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((17 * 11) % 32)) << (32 - ((17 * 11) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((20 * 11) % 32)) << (32 - ((20 * 11) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 22: | |
*out++ = (packed >> ((22 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((23 * 11) % 32)) << (32 - ((23 * 11) % 32)))); | |
} | |
case 24: | |
*out++ = (packed >> ((24 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((26 * 11) % 32)) << (32 - ((26 * 11) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((29 * 11) % 32)) << (32 - ((29 * 11) % 32)))); | |
} | |
case 30: | |
*out++ = (packed >> ((30 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((2 * 11) % 32)) << (32 - ((2 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((5 * 11) % 32)) << (32 - ((5 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((8 * 11) % 32)) << (32 - ((8 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((11 * 11) % 32)) << (32 - ((11 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((14 * 11) % 32)) << (32 - ((14 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((17 * 11) % 32)) << (32 - ((17 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((20 * 11) % 32)) << (32 - ((20 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((23 * 11) % 32)) << (32 - ((23 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((26 * 11) % 32)) << (32 - ((26 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 11) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 11) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 11) - 1) >> (32 - ((29 * 11) % 32)) << (32 - ((29 * 11) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 11) % 32)) & (uint32_t)((1ULL << 11) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_12(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)12) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)12) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 12) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 12) % 32); | |
case 2: | |
packed |= *in << ((2 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 12) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 12) % 32); | |
case 4: | |
packed |= *in++ << ((4 * 12) % 32); | |
case 5: | |
packed |= *in << ((5 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 12) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 12) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 12) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 12) % 32); | |
case 10: | |
packed |= *in << ((10 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 12) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 12) % 32); | |
case 12: | |
packed |= *in++ << ((12 * 12) % 32); | |
case 13: | |
packed |= *in << ((13 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 12) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 12) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 12) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 12) % 32); | |
case 18: | |
packed |= *in << ((18 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 12) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 12) % 32); | |
case 20: | |
packed |= *in++ << ((20 * 12) % 32); | |
case 21: | |
packed |= *in << ((21 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 12) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 12) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 12) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 12) % 32); | |
case 26: | |
packed |= *in << ((26 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 12) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 12) % 32); | |
case 28: | |
packed |= *in++ << ((28 * 12) % 32); | |
case 29: | |
packed |= *in << ((29 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 12) % 32)); | |
case 30: | |
packed |= *in++ << ((30 * 12) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 12) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 12) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 12) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 12) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 12) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 12) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 12) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 12) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 12) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 12) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 12) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 12) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 12) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 12) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 12) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 12) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 12) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 12) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 12) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 12) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 12) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 12) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 12) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 12) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 12) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 12) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 12) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 12) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 12) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 12) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 12 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)12 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_12(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)12) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((2 * 12) % 32)) << (32 - ((2 * 12) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 4: | |
*out++ = (packed >> ((4 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((5 * 12) % 32)) << (32 - ((5 * 12) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((10 * 12) % 32)) << (32 - ((10 * 12) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 12: | |
*out++ = (packed >> ((12 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((13 * 12) % 32)) << (32 - ((13 * 12) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((18 * 12) % 32)) << (32 - ((18 * 12) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 20: | |
*out++ = (packed >> ((20 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((21 * 12) % 32)) << (32 - ((21 * 12) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((26 * 12) % 32)) << (32 - ((26 * 12) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 28: | |
*out++ = (packed >> ((28 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((29 * 12) % 32)) << (32 - ((29 * 12) % 32)))); | |
} | |
case 30: | |
*out++ = (packed >> ((30 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((2 * 12) % 32)) << (32 - ((2 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((5 * 12) % 32)) << (32 - ((5 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((10 * 12) % 32)) << (32 - ((10 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((13 * 12) % 32)) << (32 - ((13 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((18 * 12) % 32)) << (32 - ((18 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((21 * 12) % 32)) << (32 - ((21 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((26 * 12) % 32)) << (32 - ((26 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 12) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 12) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 12) - 1) >> (32 - ((29 * 12) % 32)) << (32 - ((29 * 12) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 12) % 32)) & (uint32_t)((1ULL << 12) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_13(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)13) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)13) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 13) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 13) % 32); | |
case 2: | |
packed |= *in << ((2 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 13) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 13) % 32); | |
case 4: | |
packed |= *in << ((4 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 13) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 13) % 32); | |
case 6: | |
packed |= *in++ << ((6 * 13) % 32); | |
case 7: | |
packed |= *in << ((7 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 13) % 32)); | |
case 8: | |
packed |= *in++ << ((8 * 13) % 32); | |
case 9: | |
packed |= *in << ((9 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 13) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 13) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 13) % 32); | |
case 12: | |
packed |= *in << ((12 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 13) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 13) % 32); | |
case 14: | |
packed |= *in << ((14 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 13) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 13) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 13) % 32); | |
case 17: | |
packed |= *in << ((17 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 13) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 13) % 32); | |
case 19: | |
packed |= *in << ((19 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 13) % 32)); | |
case 20: | |
packed |= *in++ << ((20 * 13) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 13) % 32); | |
case 22: | |
packed |= *in << ((22 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 13) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 13) % 32); | |
case 24: | |
packed |= *in << ((24 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 13) % 32)); | |
case 25: | |
packed |= *in++ << ((25 * 13) % 32); | |
case 26: | |
packed |= *in++ << ((26 * 13) % 32); | |
case 27: | |
packed |= *in << ((27 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 13) % 32)); | |
case 28: | |
packed |= *in++ << ((28 * 13) % 32); | |
case 29: | |
packed |= *in << ((29 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 13) % 32)); | |
case 30: | |
packed |= *in++ << ((30 * 13) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 13) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 13) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 13) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 13) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 13) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 13) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 13) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 13) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 13) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 13) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 13) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 13) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 13) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 13) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 13) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 13) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 13) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 13) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 13) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 13) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 13) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 13) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 13) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 13) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 13) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 13) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 13) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 13) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 13) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 13) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 13) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 13) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 13) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 13) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 13 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)13 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_13(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)13) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((2 * 13) % 32)) << (32 - ((2 * 13) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((4 * 13) % 32)) << (32 - ((4 * 13) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 6: | |
*out++ = (packed >> ((6 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((7 * 13) % 32)) << (32 - ((7 * 13) % 32)))); | |
} | |
case 8: | |
*out++ = (packed >> ((8 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((9 * 13) % 32)) << (32 - ((9 * 13) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((12 * 13) % 32)) << (32 - ((12 * 13) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((14 * 13) % 32)) << (32 - ((14 * 13) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((17 * 13) % 32)) << (32 - ((17 * 13) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((19 * 13) % 32)) << (32 - ((19 * 13) % 32)))); | |
} | |
case 20: | |
*out++ = (packed >> ((20 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((22 * 13) % 32)) << (32 - ((22 * 13) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((24 * 13) % 32)) << (32 - ((24 * 13) % 32)))); | |
} | |
case 25: | |
*out++ = (packed >> ((25 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 26: | |
*out++ = (packed >> ((26 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((27 * 13) % 32)) << (32 - ((27 * 13) % 32)))); | |
} | |
case 28: | |
*out++ = (packed >> ((28 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((29 * 13) % 32)) << (32 - ((29 * 13) % 32)))); | |
} | |
case 30: | |
*out++ = (packed >> ((30 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((2 * 13) % 32)) << (32 - ((2 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((4 * 13) % 32)) << (32 - ((4 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((7 * 13) % 32)) << (32 - ((7 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((9 * 13) % 32)) << (32 - ((9 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((12 * 13) % 32)) << (32 - ((12 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((14 * 13) % 32)) << (32 - ((14 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((17 * 13) % 32)) << (32 - ((17 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((19 * 13) % 32)) << (32 - ((19 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((22 * 13) % 32)) << (32 - ((22 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((24 * 13) % 32)) << (32 - ((24 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((27 * 13) % 32)) << (32 - ((27 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 13) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 13) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 13) - 1) >> (32 - ((29 * 13) % 32)) << (32 - ((29 * 13) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 13) % 32)) & (uint32_t)((1ULL << 13) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_14(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)14) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)14) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 14) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 14) % 32); | |
case 2: | |
packed |= *in << ((2 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 14) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 14) % 32); | |
case 4: | |
packed |= *in << ((4 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 14) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 14) % 32); | |
case 6: | |
packed |= *in << ((6 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 14) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 14) % 32); | |
case 8: | |
packed |= *in++ << ((8 * 14) % 32); | |
case 9: | |
packed |= *in << ((9 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 14) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 14) % 32); | |
case 11: | |
packed |= *in << ((11 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 14) % 32)); | |
case 12: | |
packed |= *in++ << ((12 * 14) % 32); | |
case 13: | |
packed |= *in << ((13 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 14) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 14) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 14) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 14) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 14) % 32); | |
case 18: | |
packed |= *in << ((18 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 14) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 14) % 32); | |
case 20: | |
packed |= *in << ((20 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 14) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 14) % 32); | |
case 22: | |
packed |= *in << ((22 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 14) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 14) % 32); | |
case 24: | |
packed |= *in++ << ((24 * 14) % 32); | |
case 25: | |
packed |= *in << ((25 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 14) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 14) % 32); | |
case 27: | |
packed |= *in << ((27 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 14) % 32)); | |
case 28: | |
packed |= *in++ << ((28 * 14) % 32); | |
case 29: | |
packed |= *in << ((29 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 14) % 32)); | |
case 30: | |
packed |= *in++ << ((30 * 14) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 14) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 14) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 14) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 14) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 14) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 14) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 14) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 14) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 14) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 14) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 14) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 14) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 14) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 14) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 14) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 14) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 14) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 14) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 14) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 14) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 14) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 14) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 14) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 14) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 14) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 14) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 14) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 14) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 14) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 14) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 14) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 14) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 14) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 14) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 14 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)14 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_14(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)14) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((2 * 14) % 32)) << (32 - ((2 * 14) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((4 * 14) % 32)) << (32 - ((4 * 14) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((6 * 14) % 32)) << (32 - ((6 * 14) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 8: | |
*out++ = (packed >> ((8 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((9 * 14) % 32)) << (32 - ((9 * 14) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((11 * 14) % 32)) << (32 - ((11 * 14) % 32)))); | |
} | |
case 12: | |
*out++ = (packed >> ((12 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((13 * 14) % 32)) << (32 - ((13 * 14) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((18 * 14) % 32)) << (32 - ((18 * 14) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((20 * 14) % 32)) << (32 - ((20 * 14) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((22 * 14) % 32)) << (32 - ((22 * 14) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 24: | |
*out++ = (packed >> ((24 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((25 * 14) % 32)) << (32 - ((25 * 14) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((27 * 14) % 32)) << (32 - ((27 * 14) % 32)))); | |
} | |
case 28: | |
*out++ = (packed >> ((28 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((29 * 14) % 32)) << (32 - ((29 * 14) % 32)))); | |
} | |
case 30: | |
*out++ = (packed >> ((30 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((2 * 14) % 32)) << (32 - ((2 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((4 * 14) % 32)) << (32 - ((4 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((6 * 14) % 32)) << (32 - ((6 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((9 * 14) % 32)) << (32 - ((9 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((11 * 14) % 32)) << (32 - ((11 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((13 * 14) % 32)) << (32 - ((13 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((18 * 14) % 32)) << (32 - ((18 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((20 * 14) % 32)) << (32 - ((20 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((22 * 14) % 32)) << (32 - ((22 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((25 * 14) % 32)) << (32 - ((25 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((27 * 14) % 32)) << (32 - ((27 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 14) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 14) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 14) - 1) >> (32 - ((29 * 14) % 32)) << (32 - ((29 * 14) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 14) % 32)) & (uint32_t)((1ULL << 14) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_15(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)15) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)15) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 15) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 15) % 32); | |
case 2: | |
packed |= *in << ((2 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 15) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 15) % 32); | |
case 4: | |
packed |= *in << ((4 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 15) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 15) % 32); | |
case 6: | |
packed |= *in << ((6 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 15) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 15) % 32); | |
case 8: | |
packed |= *in << ((8 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 15) % 32)); | |
case 9: | |
packed |= *in++ << ((9 * 15) % 32); | |
case 10: | |
packed |= *in << ((10 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 15) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 15) % 32); | |
case 12: | |
packed |= *in << ((12 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 15) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 15) % 32); | |
case 14: | |
packed |= *in << ((14 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 15) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 15) % 32); | |
case 16: | |
packed |= *in++ << ((16 * 15) % 32); | |
case 17: | |
packed |= *in << ((17 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 15) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 15) % 32); | |
case 19: | |
packed |= *in << ((19 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 15) % 32)); | |
case 20: | |
packed |= *in++ << ((20 * 15) % 32); | |
case 21: | |
packed |= *in << ((21 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 15) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 15) % 32); | |
case 23: | |
packed |= *in << ((23 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 15) % 32)); | |
case 24: | |
packed |= *in++ << ((24 * 15) % 32); | |
case 25: | |
packed |= *in << ((25 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 15) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 15) % 32); | |
case 27: | |
packed |= *in << ((27 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 15) % 32)); | |
case 28: | |
packed |= *in++ << ((28 * 15) % 32); | |
case 29: | |
packed |= *in << ((29 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 15) % 32)); | |
case 30: | |
packed |= *in++ << ((30 * 15) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 15) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 15) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 15) % 32); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 15) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 15) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 15) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 15) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 15) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 15) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 15) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 15) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 15) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 15) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 15) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 15) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 15) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 15) % 32); | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 15) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 15) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 15) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 15) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 15) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 15) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 15) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 15) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 15) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 15) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 15) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 15) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 15) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 15) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 15) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 15) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 15) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 15 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)15 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_15(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)15) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((2 * 15) % 32)) << (32 - ((2 * 15) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((4 * 15) % 32)) << (32 - ((4 * 15) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((6 * 15) % 32)) << (32 - ((6 * 15) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((8 * 15) % 32)) << (32 - ((8 * 15) % 32)))); | |
} | |
case 9: | |
*out++ = (packed >> ((9 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((10 * 15) % 32)) << (32 - ((10 * 15) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((12 * 15) % 32)) << (32 - ((12 * 15) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((14 * 15) % 32)) << (32 - ((14 * 15) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 16: | |
*out++ = (packed >> ((16 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((17 * 15) % 32)) << (32 - ((17 * 15) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((19 * 15) % 32)) << (32 - ((19 * 15) % 32)))); | |
} | |
case 20: | |
*out++ = (packed >> ((20 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((21 * 15) % 32)) << (32 - ((21 * 15) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((23 * 15) % 32)) << (32 - ((23 * 15) % 32)))); | |
} | |
case 24: | |
*out++ = (packed >> ((24 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((25 * 15) % 32)) << (32 - ((25 * 15) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((27 * 15) % 32)) << (32 - ((27 * 15) % 32)))); | |
} | |
case 28: | |
*out++ = (packed >> ((28 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((29 * 15) % 32)) << (32 - ((29 * 15) % 32)))); | |
} | |
case 30: | |
*out++ = (packed >> ((30 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((2 * 15) % 32)) << (32 - ((2 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((4 * 15) % 32)) << (32 - ((4 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((6 * 15) % 32)) << (32 - ((6 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((8 * 15) % 32)) << (32 - ((8 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((10 * 15) % 32)) << (32 - ((10 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((12 * 15) % 32)) << (32 - ((12 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((14 * 15) % 32)) << (32 - ((14 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 16: | |
*out++ = (packed >> ((16 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((17 * 15) % 32)) << (32 - ((17 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((19 * 15) % 32)) << (32 - ((19 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((21 * 15) % 32)) << (32 - ((21 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((23 * 15) % 32)) << (32 - ((23 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((25 * 15) % 32)) << (32 - ((25 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((27 * 15) % 32)) << (32 - ((27 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 15) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 15) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 15) - 1) >> (32 - ((29 * 15) % 32)) << (32 - ((29 * 15) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
*out++ = (packed >> ((30 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 15) % 32)) & (uint32_t)((1ULL << 15) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_16(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)16) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)16) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 16) % 32); | |
case 1: | |
packed |= *in++ << ((1 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 2: | |
packed |= *in++ << ((2 * 16) % 32); | |
case 3: | |
packed |= *in++ << ((3 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 4: | |
packed |= *in++ << ((4 * 16) % 32); | |
case 5: | |
packed |= *in++ << ((5 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 6: | |
packed |= *in++ << ((6 * 16) % 32); | |
case 7: | |
packed |= *in++ << ((7 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 16) % 32); | |
case 9: | |
packed |= *in++ << ((9 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 10: | |
packed |= *in++ << ((10 * 16) % 32); | |
case 11: | |
packed |= *in++ << ((11 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 12: | |
packed |= *in++ << ((12 * 16) % 32); | |
case 13: | |
packed |= *in++ << ((13 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 14: | |
packed |= *in++ << ((14 * 16) % 32); | |
case 15: | |
packed |= *in++ << ((15 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 16) % 32); | |
case 17: | |
packed |= *in++ << ((17 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 18: | |
packed |= *in++ << ((18 * 16) % 32); | |
case 19: | |
packed |= *in++ << ((19 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 20: | |
packed |= *in++ << ((20 * 16) % 32); | |
case 21: | |
packed |= *in++ << ((21 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 22: | |
packed |= *in++ << ((22 * 16) % 32); | |
case 23: | |
packed |= *in++ << ((23 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 16) % 32); | |
case 25: | |
packed |= *in++ << ((25 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 26: | |
packed |= *in++ << ((26 * 16) % 32); | |
case 27: | |
packed |= *in++ << ((27 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 28: | |
packed |= *in++ << ((28 * 16) % 32); | |
case 29: | |
packed |= *in++ << ((29 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 30: | |
packed |= *in++ << ((30 * 16) % 32); | |
case 31: | |
packed |= *in++ << ((31 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 16) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 16) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 16) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 16) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 16) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 16) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 16) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 16) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 16) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 16) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 16) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 16) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 16) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 16) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 16) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 16) % 32); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 16) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 16 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)16 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_16(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)16) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 1: | |
*out++ = (packed >> ((1 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 2: | |
*out++ = (packed >> ((2 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 3: | |
*out++ = (packed >> ((3 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 5: | |
*out++ = (packed >> ((5 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 6: | |
*out++ = (packed >> ((6 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 7: | |
*out++ = (packed >> ((7 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 9: | |
*out++ = (packed >> ((9 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 10: | |
*out++ = (packed >> ((10 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 11: | |
*out++ = (packed >> ((11 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 13: | |
*out++ = (packed >> ((13 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 14: | |
*out++ = (packed >> ((14 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 15: | |
*out++ = (packed >> ((15 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 17: | |
*out++ = (packed >> ((17 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 18: | |
*out++ = (packed >> ((18 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 19: | |
*out++ = (packed >> ((19 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 21: | |
*out++ = (packed >> ((21 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 22: | |
*out++ = (packed >> ((22 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 23: | |
*out++ = (packed >> ((23 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 25: | |
*out++ = (packed >> ((25 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 26: | |
*out++ = (packed >> ((26 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 27: | |
*out++ = (packed >> ((27 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 29: | |
*out++ = (packed >> ((29 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
packed = *++in; | |
case 30: | |
*out++ = (packed >> ((30 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
case 31: | |
*out++ = (packed >> ((31 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 1: | |
*out++ = (packed >> ((1 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 2: | |
*out++ = (packed >> ((2 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 6: | |
*out++ = (packed >> ((6 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 10: | |
*out++ = (packed >> ((10 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 14: | |
*out++ = (packed >> ((14 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 18: | |
*out++ = (packed >> ((18 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 22: | |
*out++ = (packed >> ((22 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 26: | |
*out++ = (packed >> ((26 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 30: | |
*out++ = (packed >> ((30 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 16) % 32)) & (uint32_t)((1ULL << 16) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_17(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)17) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)17) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 17) % 32); | |
case 1: | |
packed |= *in << ((1 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 17) % 32)); | |
case 2: | |
packed |= *in++ << ((2 * 17) % 32); | |
case 3: | |
packed |= *in << ((3 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 17) % 32)); | |
case 4: | |
packed |= *in++ << ((4 * 17) % 32); | |
case 5: | |
packed |= *in << ((5 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 17) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 17) % 32); | |
case 7: | |
packed |= *in << ((7 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 17) % 32)); | |
case 8: | |
packed |= *in++ << ((8 * 17) % 32); | |
case 9: | |
packed |= *in << ((9 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 17) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 17) % 32); | |
case 11: | |
packed |= *in << ((11 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 17) % 32)); | |
case 12: | |
packed |= *in++ << ((12 * 17) % 32); | |
case 13: | |
packed |= *in << ((13 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 17) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 17) % 32); | |
case 15: | |
packed |= *in << ((15 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 17) % 32)); | |
case 16: | |
packed |= *in << ((16 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 17) % 32)); | |
case 17: | |
packed |= *in++ << ((17 * 17) % 32); | |
case 18: | |
packed |= *in << ((18 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 17) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 17) % 32); | |
case 20: | |
packed |= *in << ((20 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 17) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 17) % 32); | |
case 22: | |
packed |= *in << ((22 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 17) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 17) % 32); | |
case 24: | |
packed |= *in << ((24 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 17) % 32)); | |
case 25: | |
packed |= *in++ << ((25 * 17) % 32); | |
case 26: | |
packed |= *in << ((26 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 17) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 17) % 32); | |
case 28: | |
packed |= *in << ((28 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 17) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 17) % 32); | |
case 30: | |
packed |= *in << ((30 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 17) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 17) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 17) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 17) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 17) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 17) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 17) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 17) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 17) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 17) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 17) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 17) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 17) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 17) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 17) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 17) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 17) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 17) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 17) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 17) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 17) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 17) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 17) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 17) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 17) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 17) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 17) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 17) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 17) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 17) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 17) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 17) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 17) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 17) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 17) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 17 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)17 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_17(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)17) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((1 * 17) % 32)) << (32 - ((1 * 17) % 32)))); | |
} | |
case 2: | |
*out++ = (packed >> ((2 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((3 * 17) % 32)) << (32 - ((3 * 17) % 32)))); | |
} | |
case 4: | |
*out++ = (packed >> ((4 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((5 * 17) % 32)) << (32 - ((5 * 17) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((7 * 17) % 32)) << (32 - ((7 * 17) % 32)))); | |
} | |
case 8: | |
*out++ = (packed >> ((8 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((9 * 17) % 32)) << (32 - ((9 * 17) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((11 * 17) % 32)) << (32 - ((11 * 17) % 32)))); | |
} | |
case 12: | |
*out++ = (packed >> ((12 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((13 * 17) % 32)) << (32 - ((13 * 17) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((15 * 17) % 32)) << (32 - ((15 * 17) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((16 * 17) % 32)) << (32 - ((16 * 17) % 32)))); | |
} | |
case 17: | |
*out++ = (packed >> ((17 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((18 * 17) % 32)) << (32 - ((18 * 17) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((20 * 17) % 32)) << (32 - ((20 * 17) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((22 * 17) % 32)) << (32 - ((22 * 17) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((24 * 17) % 32)) << (32 - ((24 * 17) % 32)))); | |
} | |
case 25: | |
*out++ = (packed >> ((25 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((26 * 17) % 32)) << (32 - ((26 * 17) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((28 * 17) % 32)) << (32 - ((28 * 17) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((30 * 17) % 32)) << (32 - ((30 * 17) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((1 * 17) % 32)) << (32 - ((1 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((3 * 17) % 32)) << (32 - ((3 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((5 * 17) % 32)) << (32 - ((5 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((7 * 17) % 32)) << (32 - ((7 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((9 * 17) % 32)) << (32 - ((9 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((11 * 17) % 32)) << (32 - ((11 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((13 * 17) % 32)) << (32 - ((13 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((15 * 17) % 32)) << (32 - ((15 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((16 * 17) % 32)) << (32 - ((16 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((18 * 17) % 32)) << (32 - ((18 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((20 * 17) % 32)) << (32 - ((20 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((22 * 17) % 32)) << (32 - ((22 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((24 * 17) % 32)) << (32 - ((24 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((26 * 17) % 32)) << (32 - ((26 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((28 * 17) % 32)) << (32 - ((28 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 17) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 17) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 17) - 1) >> (32 - ((30 * 17) % 32)) << (32 - ((30 * 17) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 17) % 32)) & (uint32_t)((1ULL << 17) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_18(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)18) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)18) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 18) % 32); | |
case 1: | |
packed |= *in << ((1 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 18) % 32)); | |
case 2: | |
packed |= *in++ << ((2 * 18) % 32); | |
case 3: | |
packed |= *in << ((3 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 18) % 32)); | |
case 4: | |
packed |= *in++ << ((4 * 18) % 32); | |
case 5: | |
packed |= *in << ((5 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 18) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 18) % 32); | |
case 7: | |
packed |= *in << ((7 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 18) % 32)); | |
case 8: | |
packed |= *in << ((8 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 18) % 32)); | |
case 9: | |
packed |= *in++ << ((9 * 18) % 32); | |
case 10: | |
packed |= *in << ((10 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 18) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 18) % 32); | |
case 12: | |
packed |= *in << ((12 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 18) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 18) % 32); | |
case 14: | |
packed |= *in << ((14 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 18) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 18) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 18) % 32); | |
case 17: | |
packed |= *in << ((17 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 18) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 18) % 32); | |
case 19: | |
packed |= *in << ((19 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 18) % 32)); | |
case 20: | |
packed |= *in++ << ((20 * 18) % 32); | |
case 21: | |
packed |= *in << ((21 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 18) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 18) % 32); | |
case 23: | |
packed |= *in << ((23 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 18) % 32)); | |
case 24: | |
packed |= *in << ((24 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 18) % 32)); | |
case 25: | |
packed |= *in++ << ((25 * 18) % 32); | |
case 26: | |
packed |= *in << ((26 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 18) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 18) % 32); | |
case 28: | |
packed |= *in << ((28 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 18) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 18) % 32); | |
case 30: | |
packed |= *in << ((30 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 18) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 18) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 18) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 18) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 18) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 18) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 18) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 18) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 18) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 18) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 18) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 18) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 18) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 18) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 18) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 18) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 18) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 18) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 18) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 18) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 18) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 18) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 18) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 18) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 18) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 18) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 18) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 18) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 18) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 18) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 18) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 18) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 18) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 18) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 18) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 18 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)18 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_18(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)18) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((1 * 18) % 32)) << (32 - ((1 * 18) % 32)))); | |
} | |
case 2: | |
*out++ = (packed >> ((2 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((3 * 18) % 32)) << (32 - ((3 * 18) % 32)))); | |
} | |
case 4: | |
*out++ = (packed >> ((4 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((5 * 18) % 32)) << (32 - ((5 * 18) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((7 * 18) % 32)) << (32 - ((7 * 18) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((8 * 18) % 32)) << (32 - ((8 * 18) % 32)))); | |
} | |
case 9: | |
*out++ = (packed >> ((9 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((10 * 18) % 32)) << (32 - ((10 * 18) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((12 * 18) % 32)) << (32 - ((12 * 18) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((14 * 18) % 32)) << (32 - ((14 * 18) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((17 * 18) % 32)) << (32 - ((17 * 18) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((19 * 18) % 32)) << (32 - ((19 * 18) % 32)))); | |
} | |
case 20: | |
*out++ = (packed >> ((20 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((21 * 18) % 32)) << (32 - ((21 * 18) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((23 * 18) % 32)) << (32 - ((23 * 18) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((24 * 18) % 32)) << (32 - ((24 * 18) % 32)))); | |
} | |
case 25: | |
*out++ = (packed >> ((25 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((26 * 18) % 32)) << (32 - ((26 * 18) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((28 * 18) % 32)) << (32 - ((28 * 18) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((30 * 18) % 32)) << (32 - ((30 * 18) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((1 * 18) % 32)) << (32 - ((1 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((3 * 18) % 32)) << (32 - ((3 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((5 * 18) % 32)) << (32 - ((5 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((7 * 18) % 32)) << (32 - ((7 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((8 * 18) % 32)) << (32 - ((8 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((10 * 18) % 32)) << (32 - ((10 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((12 * 18) % 32)) << (32 - ((12 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((14 * 18) % 32)) << (32 - ((14 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((17 * 18) % 32)) << (32 - ((17 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((19 * 18) % 32)) << (32 - ((19 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((21 * 18) % 32)) << (32 - ((21 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((23 * 18) % 32)) << (32 - ((23 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((24 * 18) % 32)) << (32 - ((24 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((26 * 18) % 32)) << (32 - ((26 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((28 * 18) % 32)) << (32 - ((28 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 18) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 18) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 18) - 1) >> (32 - ((30 * 18) % 32)) << (32 - ((30 * 18) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 18) % 32)) & (uint32_t)((1ULL << 18) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_19(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)19) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)19) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 19) % 32); | |
case 1: | |
packed |= *in << ((1 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 19) % 32)); | |
case 2: | |
packed |= *in++ << ((2 * 19) % 32); | |
case 3: | |
packed |= *in << ((3 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 19) % 32)); | |
case 4: | |
packed |= *in++ << ((4 * 19) % 32); | |
case 5: | |
packed |= *in << ((5 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 19) % 32)); | |
case 6: | |
packed |= *in << ((6 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 19) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 19) % 32); | |
case 8: | |
packed |= *in << ((8 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 19) % 32)); | |
case 9: | |
packed |= *in++ << ((9 * 19) % 32); | |
case 10: | |
packed |= *in << ((10 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 19) % 32)); | |
case 11: | |
packed |= *in << ((11 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 19) % 32)); | |
case 12: | |
packed |= *in++ << ((12 * 19) % 32); | |
case 13: | |
packed |= *in << ((13 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 19) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 19) % 32); | |
case 15: | |
packed |= *in << ((15 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 19) % 32)); | |
case 16: | |
packed |= *in << ((16 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 19) % 32)); | |
case 17: | |
packed |= *in++ << ((17 * 19) % 32); | |
case 18: | |
packed |= *in << ((18 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 19) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 19) % 32); | |
case 20: | |
packed |= *in << ((20 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 19) % 32)); | |
case 21: | |
packed |= *in << ((21 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 19) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 19) % 32); | |
case 23: | |
packed |= *in << ((23 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 19) % 32)); | |
case 24: | |
packed |= *in++ << ((24 * 19) % 32); | |
case 25: | |
packed |= *in << ((25 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 19) % 32)); | |
case 26: | |
packed |= *in << ((26 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 19) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 19) % 32); | |
case 28: | |
packed |= *in << ((28 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 19) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 19) % 32); | |
case 30: | |
packed |= *in << ((30 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 19) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 19) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 19) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 19) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 19) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 19) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 19) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 19) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 19) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 19) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 19) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 19) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 19) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 19) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 19) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 19) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 19) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 19) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 19) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 19) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 19) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 19) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 19) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 19) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 19) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 19) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 19) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 19) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 19) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 19) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 19) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 19) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 19) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 19) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 19) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 19 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)19 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_19(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)19) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((1 * 19) % 32)) << (32 - ((1 * 19) % 32)))); | |
} | |
case 2: | |
*out++ = (packed >> ((2 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((3 * 19) % 32)) << (32 - ((3 * 19) % 32)))); | |
} | |
case 4: | |
*out++ = (packed >> ((4 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((5 * 19) % 32)) << (32 - ((5 * 19) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((6 * 19) % 32)) << (32 - ((6 * 19) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((8 * 19) % 32)) << (32 - ((8 * 19) % 32)))); | |
} | |
case 9: | |
*out++ = (packed >> ((9 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((10 * 19) % 32)) << (32 - ((10 * 19) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((11 * 19) % 32)) << (32 - ((11 * 19) % 32)))); | |
} | |
case 12: | |
*out++ = (packed >> ((12 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((13 * 19) % 32)) << (32 - ((13 * 19) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((15 * 19) % 32)) << (32 - ((15 * 19) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((16 * 19) % 32)) << (32 - ((16 * 19) % 32)))); | |
} | |
case 17: | |
*out++ = (packed >> ((17 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((18 * 19) % 32)) << (32 - ((18 * 19) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((20 * 19) % 32)) << (32 - ((20 * 19) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((21 * 19) % 32)) << (32 - ((21 * 19) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((23 * 19) % 32)) << (32 - ((23 * 19) % 32)))); | |
} | |
case 24: | |
*out++ = (packed >> ((24 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((25 * 19) % 32)) << (32 - ((25 * 19) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((26 * 19) % 32)) << (32 - ((26 * 19) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((28 * 19) % 32)) << (32 - ((28 * 19) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((30 * 19) % 32)) << (32 - ((30 * 19) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((1 * 19) % 32)) << (32 - ((1 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((3 * 19) % 32)) << (32 - ((3 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((5 * 19) % 32)) << (32 - ((5 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((6 * 19) % 32)) << (32 - ((6 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((8 * 19) % 32)) << (32 - ((8 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((10 * 19) % 32)) << (32 - ((10 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((11 * 19) % 32)) << (32 - ((11 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((13 * 19) % 32)) << (32 - ((13 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((15 * 19) % 32)) << (32 - ((15 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((16 * 19) % 32)) << (32 - ((16 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((18 * 19) % 32)) << (32 - ((18 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((20 * 19) % 32)) << (32 - ((20 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((21 * 19) % 32)) << (32 - ((21 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((23 * 19) % 32)) << (32 - ((23 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((25 * 19) % 32)) << (32 - ((25 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((26 * 19) % 32)) << (32 - ((26 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((28 * 19) % 32)) << (32 - ((28 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 19) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 19) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 19) - 1) >> (32 - ((30 * 19) % 32)) << (32 - ((30 * 19) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 19) % 32)) & (uint32_t)((1ULL << 19) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_20(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)20) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)20) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 20) % 32); | |
case 1: | |
packed |= *in << ((1 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 20) % 32)); | |
case 2: | |
packed |= *in++ << ((2 * 20) % 32); | |
case 3: | |
packed |= *in << ((3 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 20) % 32)); | |
case 4: | |
packed |= *in << ((4 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 20) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 20) % 32); | |
case 6: | |
packed |= *in << ((6 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 20) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 20) % 32); | |
case 9: | |
packed |= *in << ((9 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 20) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 20) % 32); | |
case 11: | |
packed |= *in << ((11 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 20) % 32)); | |
case 12: | |
packed |= *in << ((12 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 20) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 20) % 32); | |
case 14: | |
packed |= *in << ((14 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 20) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 20) % 32); | |
case 17: | |
packed |= *in << ((17 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 20) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 20) % 32); | |
case 19: | |
packed |= *in << ((19 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 20) % 32)); | |
case 20: | |
packed |= *in << ((20 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 20) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 20) % 32); | |
case 22: | |
packed |= *in << ((22 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 20) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 20) % 32); | |
case 25: | |
packed |= *in << ((25 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 20) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 20) % 32); | |
case 27: | |
packed |= *in << ((27 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 20) % 32)); | |
case 28: | |
packed |= *in << ((28 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 20) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 20) % 32); | |
case 30: | |
packed |= *in << ((30 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 20) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 20) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 20) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 20) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 20) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 20) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 20) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 20) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 20) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 20) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 20) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 20) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 20) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 20) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 20) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 20) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 20) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 20) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 20) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 20) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 20) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 20) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 20) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 20) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 20) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 20) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 20) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 20) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 20) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 20) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 20) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 20 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)20 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_20(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)20) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((1 * 20) % 32)) << (32 - ((1 * 20) % 32)))); | |
} | |
case 2: | |
*out++ = (packed >> ((2 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((3 * 20) % 32)) << (32 - ((3 * 20) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((4 * 20) % 32)) << (32 - ((4 * 20) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((6 * 20) % 32)) << (32 - ((6 * 20) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((9 * 20) % 32)) << (32 - ((9 * 20) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((11 * 20) % 32)) << (32 - ((11 * 20) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((12 * 20) % 32)) << (32 - ((12 * 20) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((14 * 20) % 32)) << (32 - ((14 * 20) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((17 * 20) % 32)) << (32 - ((17 * 20) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((19 * 20) % 32)) << (32 - ((19 * 20) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((20 * 20) % 32)) << (32 - ((20 * 20) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((22 * 20) % 32)) << (32 - ((22 * 20) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((25 * 20) % 32)) << (32 - ((25 * 20) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((27 * 20) % 32)) << (32 - ((27 * 20) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((28 * 20) % 32)) << (32 - ((28 * 20) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((30 * 20) % 32)) << (32 - ((30 * 20) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((1 * 20) % 32)) << (32 - ((1 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((3 * 20) % 32)) << (32 - ((3 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((4 * 20) % 32)) << (32 - ((4 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((6 * 20) % 32)) << (32 - ((6 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((9 * 20) % 32)) << (32 - ((9 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((11 * 20) % 32)) << (32 - ((11 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((12 * 20) % 32)) << (32 - ((12 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((14 * 20) % 32)) << (32 - ((14 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((17 * 20) % 32)) << (32 - ((17 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((19 * 20) % 32)) << (32 - ((19 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((20 * 20) % 32)) << (32 - ((20 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((22 * 20) % 32)) << (32 - ((22 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((25 * 20) % 32)) << (32 - ((25 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((27 * 20) % 32)) << (32 - ((27 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((28 * 20) % 32)) << (32 - ((28 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 20) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 20) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 20) - 1) >> (32 - ((30 * 20) % 32)) << (32 - ((30 * 20) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 20) % 32)) & (uint32_t)((1ULL << 20) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_21(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)21) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)21) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 21) % 32); | |
case 1: | |
packed |= *in << ((1 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 21) % 32)); | |
case 2: | |
packed |= *in++ << ((2 * 21) % 32); | |
case 3: | |
packed |= *in << ((3 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 21) % 32)); | |
case 4: | |
packed |= *in << ((4 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 21) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 21) % 32); | |
case 6: | |
packed |= *in << ((6 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 21) % 32)); | |
case 7: | |
packed |= *in << ((7 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 21) % 32)); | |
case 8: | |
packed |= *in++ << ((8 * 21) % 32); | |
case 9: | |
packed |= *in << ((9 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 21) % 32)); | |
case 10: | |
packed |= *in << ((10 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 21) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 21) % 32); | |
case 12: | |
packed |= *in << ((12 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 21) % 32)); | |
case 13: | |
packed |= *in << ((13 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 21) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 21) % 32); | |
case 15: | |
packed |= *in << ((15 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 21) % 32)); | |
case 16: | |
packed |= *in << ((16 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 21) % 32)); | |
case 17: | |
packed |= *in++ << ((17 * 21) % 32); | |
case 18: | |
packed |= *in << ((18 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 21) % 32)); | |
case 19: | |
packed |= *in << ((19 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 21) % 32)); | |
case 20: | |
packed |= *in++ << ((20 * 21) % 32); | |
case 21: | |
packed |= *in << ((21 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 21) % 32)); | |
case 22: | |
packed |= *in << ((22 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 21) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 21) % 32); | |
case 24: | |
packed |= *in << ((24 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 21) % 32)); | |
case 25: | |
packed |= *in << ((25 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 21) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 21) % 32); | |
case 27: | |
packed |= *in << ((27 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 21) % 32)); | |
case 28: | |
packed |= *in << ((28 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 21) % 32)); | |
case 29: | |
packed |= *in++ << ((29 * 21) % 32); | |
case 30: | |
packed |= *in << ((30 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 21) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 21) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 21) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 21) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 21) % 32); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 21) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 21) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 21) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 21) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 21) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 21) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 21) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 21) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 21) % 32); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 21) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 21) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 21) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 21) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 21) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 21) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 21) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 21) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 21) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 21) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 21) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 21) % 32); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 21) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 21) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 21) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 21) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 21) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 21) % 32); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 21) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 21) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 21) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 21 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)21 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_21(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)21) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((1 * 21) % 32)) << (32 - ((1 * 21) % 32)))); | |
} | |
case 2: | |
*out++ = (packed >> ((2 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((3 * 21) % 32)) << (32 - ((3 * 21) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((4 * 21) % 32)) << (32 - ((4 * 21) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((6 * 21) % 32)) << (32 - ((6 * 21) % 32)))); | |
} | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((7 * 21) % 32)) << (32 - ((7 * 21) % 32)))); | |
} | |
case 8: | |
*out++ = (packed >> ((8 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((9 * 21) % 32)) << (32 - ((9 * 21) % 32)))); | |
} | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((10 * 21) % 32)) << (32 - ((10 * 21) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((12 * 21) % 32)) << (32 - ((12 * 21) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((13 * 21) % 32)) << (32 - ((13 * 21) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((15 * 21) % 32)) << (32 - ((15 * 21) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((16 * 21) % 32)) << (32 - ((16 * 21) % 32)))); | |
} | |
case 17: | |
*out++ = (packed >> ((17 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((18 * 21) % 32)) << (32 - ((18 * 21) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((19 * 21) % 32)) << (32 - ((19 * 21) % 32)))); | |
} | |
case 20: | |
*out++ = (packed >> ((20 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((21 * 21) % 32)) << (32 - ((21 * 21) % 32)))); | |
} | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((22 * 21) % 32)) << (32 - ((22 * 21) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((24 * 21) % 32)) << (32 - ((24 * 21) % 32)))); | |
} | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((25 * 21) % 32)) << (32 - ((25 * 21) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((27 * 21) % 32)) << (32 - ((27 * 21) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((28 * 21) % 32)) << (32 - ((28 * 21) % 32)))); | |
} | |
case 29: | |
*out++ = (packed >> ((29 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((30 * 21) % 32)) << (32 - ((30 * 21) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((1 * 21) % 32)) << (32 - ((1 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
*out++ = (packed >> ((2 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((3 * 21) % 32)) << (32 - ((3 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((4 * 21) % 32)) << (32 - ((4 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((6 * 21) % 32)) << (32 - ((6 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((7 * 21) % 32)) << (32 - ((7 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
*out++ = (packed >> ((8 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((9 * 21) % 32)) << (32 - ((9 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((10 * 21) % 32)) << (32 - ((10 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((12 * 21) % 32)) << (32 - ((12 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((13 * 21) % 32)) << (32 - ((13 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((15 * 21) % 32)) << (32 - ((15 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((16 * 21) % 32)) << (32 - ((16 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((18 * 21) % 32)) << (32 - ((18 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((19 * 21) % 32)) << (32 - ((19 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
*out++ = (packed >> ((20 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((21 * 21) % 32)) << (32 - ((21 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((22 * 21) % 32)) << (32 - ((22 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((24 * 21) % 32)) << (32 - ((24 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((25 * 21) % 32)) << (32 - ((25 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((27 * 21) % 32)) << (32 - ((27 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((28 * 21) % 32)) << (32 - ((28 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
*out++ = (packed >> ((29 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 21) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 21) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 21) - 1) >> (32 - ((30 * 21) % 32)) << (32 - ((30 * 21) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 21) % 32)) & (uint32_t)((1ULL << 21) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_22(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)22) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)22) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 22) % 32); | |
case 1: | |
packed |= *in << ((1 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 22) % 32)); | |
case 2: | |
packed |= *in << ((2 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 22) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 22) % 32); | |
case 4: | |
packed |= *in << ((4 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 22) % 32)); | |
case 5: | |
packed |= *in << ((5 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 22) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 22) % 32); | |
case 7: | |
packed |= *in << ((7 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 22) % 32)); | |
case 8: | |
packed |= *in << ((8 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 22) % 32)); | |
case 9: | |
packed |= *in++ << ((9 * 22) % 32); | |
case 10: | |
packed |= *in << ((10 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 22) % 32)); | |
case 11: | |
packed |= *in << ((11 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 22) % 32)); | |
case 12: | |
packed |= *in++ << ((12 * 22) % 32); | |
case 13: | |
packed |= *in << ((13 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 22) % 32)); | |
case 14: | |
packed |= *in << ((14 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 22) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 22) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 22) % 32); | |
case 17: | |
packed |= *in << ((17 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 22) % 32)); | |
case 18: | |
packed |= *in << ((18 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 22) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 22) % 32); | |
case 20: | |
packed |= *in << ((20 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 22) % 32)); | |
case 21: | |
packed |= *in << ((21 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 22) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 22) % 32); | |
case 23: | |
packed |= *in << ((23 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 22) % 32)); | |
case 24: | |
packed |= *in << ((24 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 22) % 32)); | |
case 25: | |
packed |= *in++ << ((25 * 22) % 32); | |
case 26: | |
packed |= *in << ((26 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 22) % 32)); | |
case 27: | |
packed |= *in << ((27 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 22) % 32)); | |
case 28: | |
packed |= *in++ << ((28 * 22) % 32); | |
case 29: | |
packed |= *in << ((29 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 22) % 32)); | |
case 30: | |
packed |= *in << ((30 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 22) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 22) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 22) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 22) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 22) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 22) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 22) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 22) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 22) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 22) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 22) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 22) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 22) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 22) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 22) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 22) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 22) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 22) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 22) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 22) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 22) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 22) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 22) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 22) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 22) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 22) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 22) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 22) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 22) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 22) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 22) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 22) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 22) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 22) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 22) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 22 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)22 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_22(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)22) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((1 * 22) % 32)) << (32 - ((1 * 22) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((2 * 22) % 32)) << (32 - ((2 * 22) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((4 * 22) % 32)) << (32 - ((4 * 22) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((5 * 22) % 32)) << (32 - ((5 * 22) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((7 * 22) % 32)) << (32 - ((7 * 22) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((8 * 22) % 32)) << (32 - ((8 * 22) % 32)))); | |
} | |
case 9: | |
*out++ = (packed >> ((9 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((10 * 22) % 32)) << (32 - ((10 * 22) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((11 * 22) % 32)) << (32 - ((11 * 22) % 32)))); | |
} | |
case 12: | |
*out++ = (packed >> ((12 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((13 * 22) % 32)) << (32 - ((13 * 22) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((14 * 22) % 32)) << (32 - ((14 * 22) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((17 * 22) % 32)) << (32 - ((17 * 22) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((18 * 22) % 32)) << (32 - ((18 * 22) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((20 * 22) % 32)) << (32 - ((20 * 22) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((21 * 22) % 32)) << (32 - ((21 * 22) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((23 * 22) % 32)) << (32 - ((23 * 22) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((24 * 22) % 32)) << (32 - ((24 * 22) % 32)))); | |
} | |
case 25: | |
*out++ = (packed >> ((25 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((26 * 22) % 32)) << (32 - ((26 * 22) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((27 * 22) % 32)) << (32 - ((27 * 22) % 32)))); | |
} | |
case 28: | |
*out++ = (packed >> ((28 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((29 * 22) % 32)) << (32 - ((29 * 22) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((30 * 22) % 32)) << (32 - ((30 * 22) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((1 * 22) % 32)) << (32 - ((1 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((2 * 22) % 32)) << (32 - ((2 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((4 * 22) % 32)) << (32 - ((4 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((5 * 22) % 32)) << (32 - ((5 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((7 * 22) % 32)) << (32 - ((7 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((8 * 22) % 32)) << (32 - ((8 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((10 * 22) % 32)) << (32 - ((10 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((11 * 22) % 32)) << (32 - ((11 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((13 * 22) % 32)) << (32 - ((13 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((14 * 22) % 32)) << (32 - ((14 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((17 * 22) % 32)) << (32 - ((17 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((18 * 22) % 32)) << (32 - ((18 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((20 * 22) % 32)) << (32 - ((20 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((21 * 22) % 32)) << (32 - ((21 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((23 * 22) % 32)) << (32 - ((23 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((24 * 22) % 32)) << (32 - ((24 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((26 * 22) % 32)) << (32 - ((26 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((27 * 22) % 32)) << (32 - ((27 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((29 * 22) % 32)) << (32 - ((29 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 22) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 22) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 22) - 1) >> (32 - ((30 * 22) % 32)) << (32 - ((30 * 22) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 22) % 32)) & (uint32_t)((1ULL << 22) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_23(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)23) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)23) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 23) % 32); | |
case 1: | |
packed |= *in << ((1 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 23) % 32)); | |
case 2: | |
packed |= *in << ((2 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 23) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 23) % 32); | |
case 4: | |
packed |= *in << ((4 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 23) % 32)); | |
case 5: | |
packed |= *in << ((5 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 23) % 32)); | |
case 6: | |
packed |= *in << ((6 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 23) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 23) % 32); | |
case 8: | |
packed |= *in << ((8 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 23) % 32)); | |
case 9: | |
packed |= *in << ((9 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 23) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 23) % 32); | |
case 11: | |
packed |= *in << ((11 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 23) % 32)); | |
case 12: | |
packed |= *in << ((12 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 23) % 32)); | |
case 13: | |
packed |= *in << ((13 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 23) % 32)); | |
case 14: | |
packed |= *in++ << ((14 * 23) % 32); | |
case 15: | |
packed |= *in << ((15 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 23) % 32)); | |
case 16: | |
packed |= *in << ((16 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 23) % 32)); | |
case 17: | |
packed |= *in++ << ((17 * 23) % 32); | |
case 18: | |
packed |= *in << ((18 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 23) % 32)); | |
case 19: | |
packed |= *in << ((19 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 23) % 32)); | |
case 20: | |
packed |= *in << ((20 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 23) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 23) % 32); | |
case 22: | |
packed |= *in << ((22 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 23) % 32)); | |
case 23: | |
packed |= *in << ((23 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 23) % 32)); | |
case 24: | |
packed |= *in++ << ((24 * 23) % 32); | |
case 25: | |
packed |= *in << ((25 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 23) % 32)); | |
case 26: | |
packed |= *in << ((26 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 23) % 32)); | |
case 27: | |
packed |= *in << ((27 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 23) % 32)); | |
case 28: | |
packed |= *in++ << ((28 * 23) % 32); | |
case 29: | |
packed |= *in << ((29 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 23) % 32)); | |
case 30: | |
packed |= *in << ((30 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 23) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 23) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 23) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 23) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 23) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 23) % 32); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 23) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 23) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 23) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 23) % 32); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 23) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 23) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 23) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 23) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 23) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 23) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 23) % 32); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 23) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 23) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 23) % 32); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 23) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 23) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 23) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 23) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 23) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 23) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 23) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 23) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 23) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 23) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 23) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 23) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 23) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 23) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 23) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 23 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)23 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_23(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)23) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((1 * 23) % 32)) << (32 - ((1 * 23) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((2 * 23) % 32)) << (32 - ((2 * 23) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((4 * 23) % 32)) << (32 - ((4 * 23) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((5 * 23) % 32)) << (32 - ((5 * 23) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((6 * 23) % 32)) << (32 - ((6 * 23) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((8 * 23) % 32)) << (32 - ((8 * 23) % 32)))); | |
} | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((9 * 23) % 32)) << (32 - ((9 * 23) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((11 * 23) % 32)) << (32 - ((11 * 23) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((12 * 23) % 32)) << (32 - ((12 * 23) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((13 * 23) % 32)) << (32 - ((13 * 23) % 32)))); | |
} | |
case 14: | |
*out++ = (packed >> ((14 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((15 * 23) % 32)) << (32 - ((15 * 23) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((16 * 23) % 32)) << (32 - ((16 * 23) % 32)))); | |
} | |
case 17: | |
*out++ = (packed >> ((17 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((18 * 23) % 32)) << (32 - ((18 * 23) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((19 * 23) % 32)) << (32 - ((19 * 23) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((20 * 23) % 32)) << (32 - ((20 * 23) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((22 * 23) % 32)) << (32 - ((22 * 23) % 32)))); | |
} | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((23 * 23) % 32)) << (32 - ((23 * 23) % 32)))); | |
} | |
case 24: | |
*out++ = (packed >> ((24 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((25 * 23) % 32)) << (32 - ((25 * 23) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((26 * 23) % 32)) << (32 - ((26 * 23) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((27 * 23) % 32)) << (32 - ((27 * 23) % 32)))); | |
} | |
case 28: | |
*out++ = (packed >> ((28 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((29 * 23) % 32)) << (32 - ((29 * 23) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((30 * 23) % 32)) << (32 - ((30 * 23) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((1 * 23) % 32)) << (32 - ((1 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((2 * 23) % 32)) << (32 - ((2 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((4 * 23) % 32)) << (32 - ((4 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((5 * 23) % 32)) << (32 - ((5 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((6 * 23) % 32)) << (32 - ((6 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((8 * 23) % 32)) << (32 - ((8 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((9 * 23) % 32)) << (32 - ((9 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((11 * 23) % 32)) << (32 - ((11 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((12 * 23) % 32)) << (32 - ((12 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((13 * 23) % 32)) << (32 - ((13 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
*out++ = (packed >> ((14 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((15 * 23) % 32)) << (32 - ((15 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((16 * 23) % 32)) << (32 - ((16 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
*out++ = (packed >> ((17 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((18 * 23) % 32)) << (32 - ((18 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((19 * 23) % 32)) << (32 - ((19 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((20 * 23) % 32)) << (32 - ((20 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((22 * 23) % 32)) << (32 - ((22 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((23 * 23) % 32)) << (32 - ((23 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
*out++ = (packed >> ((24 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((25 * 23) % 32)) << (32 - ((25 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((26 * 23) % 32)) << (32 - ((26 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((27 * 23) % 32)) << (32 - ((27 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
*out++ = (packed >> ((28 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((29 * 23) % 32)) << (32 - ((29 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 23) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 23) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 23) - 1) >> (32 - ((30 * 23) % 32)) << (32 - ((30 * 23) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 23) % 32)) & (uint32_t)((1ULL << 23) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_24(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)24) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)24) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 24) % 32); | |
case 1: | |
packed |= *in << ((1 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 24) % 32)); | |
case 2: | |
packed |= *in << ((2 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 24) % 32)); | |
case 3: | |
packed |= *in++ << ((3 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 4: | |
packed |= *in++ << ((4 * 24) % 32); | |
case 5: | |
packed |= *in << ((5 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 24) % 32)); | |
case 6: | |
packed |= *in << ((6 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 24) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 24) % 32); | |
case 9: | |
packed |= *in << ((9 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 24) % 32)); | |
case 10: | |
packed |= *in << ((10 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 24) % 32)); | |
case 11: | |
packed |= *in++ << ((11 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 12: | |
packed |= *in++ << ((12 * 24) % 32); | |
case 13: | |
packed |= *in << ((13 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 24) % 32)); | |
case 14: | |
packed |= *in << ((14 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 24) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 24) % 32); | |
case 17: | |
packed |= *in << ((17 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 24) % 32)); | |
case 18: | |
packed |= *in << ((18 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 24) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 20: | |
packed |= *in++ << ((20 * 24) % 32); | |
case 21: | |
packed |= *in << ((21 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 24) % 32)); | |
case 22: | |
packed |= *in << ((22 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 24) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 24) % 32); | |
case 25: | |
packed |= *in << ((25 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 24) % 32)); | |
case 26: | |
packed |= *in << ((26 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 24) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 28: | |
packed |= *in++ << ((28 * 24) % 32); | |
case 29: | |
packed |= *in << ((29 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 24) % 32)); | |
case 30: | |
packed |= *in << ((30 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 24) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 24) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 24) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 24) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 24) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 24) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 24) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 24) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 24) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 24) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 24) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 24) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 24) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 24) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 24) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 24) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 24) % 32); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 24) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 24) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 24) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 24) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 24) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 24) % 32); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 24) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 24) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 24) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 24) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 24 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)24 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_24(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)24) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((1 * 24) % 32)) << (32 - ((1 * 24) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((2 * 24) % 32)) << (32 - ((2 * 24) % 32)))); | |
} | |
case 3: | |
*out++ = (packed >> ((3 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((5 * 24) % 32)) << (32 - ((5 * 24) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((6 * 24) % 32)) << (32 - ((6 * 24) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((9 * 24) % 32)) << (32 - ((9 * 24) % 32)))); | |
} | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((10 * 24) % 32)) << (32 - ((10 * 24) % 32)))); | |
} | |
case 11: | |
*out++ = (packed >> ((11 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((13 * 24) % 32)) << (32 - ((13 * 24) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((14 * 24) % 32)) << (32 - ((14 * 24) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((17 * 24) % 32)) << (32 - ((17 * 24) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((18 * 24) % 32)) << (32 - ((18 * 24) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((21 * 24) % 32)) << (32 - ((21 * 24) % 32)))); | |
} | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((22 * 24) % 32)) << (32 - ((22 * 24) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((25 * 24) % 32)) << (32 - ((25 * 24) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((26 * 24) % 32)) << (32 - ((26 * 24) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((29 * 24) % 32)) << (32 - ((29 * 24) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((30 * 24) % 32)) << (32 - ((30 * 24) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((1 * 24) % 32)) << (32 - ((1 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((2 * 24) % 32)) << (32 - ((2 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
*out++ = (packed >> ((3 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((5 * 24) % 32)) << (32 - ((5 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((6 * 24) % 32)) << (32 - ((6 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((9 * 24) % 32)) << (32 - ((9 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((10 * 24) % 32)) << (32 - ((10 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
*out++ = (packed >> ((11 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((13 * 24) % 32)) << (32 - ((13 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((14 * 24) % 32)) << (32 - ((14 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((17 * 24) % 32)) << (32 - ((17 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((18 * 24) % 32)) << (32 - ((18 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((21 * 24) % 32)) << (32 - ((21 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((22 * 24) % 32)) << (32 - ((22 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((25 * 24) % 32)) << (32 - ((25 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((26 * 24) % 32)) << (32 - ((26 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((29 * 24) % 32)) << (32 - ((29 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 24) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 24) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 24) - 1) >> (32 - ((30 * 24) % 32)) << (32 - ((30 * 24) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 24) % 32)) & (uint32_t)((1ULL << 24) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_25(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)25) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)25) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 25) % 32); | |
case 1: | |
packed |= *in << ((1 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 25) % 32)); | |
case 2: | |
packed |= *in << ((2 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 25) % 32)); | |
case 3: | |
packed |= *in << ((3 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 25) % 32)); | |
case 4: | |
packed |= *in++ << ((4 * 25) % 32); | |
case 5: | |
packed |= *in << ((5 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 25) % 32)); | |
case 6: | |
packed |= *in << ((6 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 25) % 32)); | |
case 7: | |
packed |= *in << ((7 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 25) % 32)); | |
case 8: | |
packed |= *in << ((8 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 25) % 32)); | |
case 9: | |
packed |= *in++ << ((9 * 25) % 32); | |
case 10: | |
packed |= *in << ((10 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 25) % 32)); | |
case 11: | |
packed |= *in << ((11 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 25) % 32)); | |
case 12: | |
packed |= *in << ((12 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 25) % 32)); | |
case 13: | |
packed |= *in++ << ((13 * 25) % 32); | |
case 14: | |
packed |= *in << ((14 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 25) % 32)); | |
case 15: | |
packed |= *in << ((15 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 25) % 32)); | |
case 16: | |
packed |= *in << ((16 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 25) % 32)); | |
case 17: | |
packed |= *in << ((17 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 25) % 32)); | |
case 18: | |
packed |= *in++ << ((18 * 25) % 32); | |
case 19: | |
packed |= *in << ((19 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 25) % 32)); | |
case 20: | |
packed |= *in << ((20 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 25) % 32)); | |
case 21: | |
packed |= *in << ((21 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 25) % 32)); | |
case 22: | |
packed |= *in++ << ((22 * 25) % 32); | |
case 23: | |
packed |= *in << ((23 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 25) % 32)); | |
case 24: | |
packed |= *in << ((24 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 25) % 32)); | |
case 25: | |
packed |= *in << ((25 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 25) % 32)); | |
case 26: | |
packed |= *in << ((26 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 25) % 32)); | |
case 27: | |
packed |= *in++ << ((27 * 25) % 32); | |
case 28: | |
packed |= *in << ((28 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 25) % 32)); | |
case 29: | |
packed |= *in << ((29 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 25) % 32)); | |
case 30: | |
packed |= *in << ((30 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 25) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 25) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 25) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 25) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 25) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 25) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 25) % 32); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 25) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 25) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 25) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 25) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 25) % 32); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 25) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 25) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 25) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 25) % 32); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 25) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 25) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 25) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 25) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 25) % 32); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 25) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 25) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 25) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 25) % 32); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 25) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 25) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 25) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 25) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 25) % 32); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 25) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 25) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 25) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 25) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 25) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 25 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)25 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_25(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)25) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((1 * 25) % 32)) << (32 - ((1 * 25) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((2 * 25) % 32)) << (32 - ((2 * 25) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((3 * 25) % 32)) << (32 - ((3 * 25) % 32)))); | |
} | |
case 4: | |
*out++ = (packed >> ((4 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((5 * 25) % 32)) << (32 - ((5 * 25) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((6 * 25) % 32)) << (32 - ((6 * 25) % 32)))); | |
} | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((7 * 25) % 32)) << (32 - ((7 * 25) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((8 * 25) % 32)) << (32 - ((8 * 25) % 32)))); | |
} | |
case 9: | |
*out++ = (packed >> ((9 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((10 * 25) % 32)) << (32 - ((10 * 25) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((11 * 25) % 32)) << (32 - ((11 * 25) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((12 * 25) % 32)) << (32 - ((12 * 25) % 32)))); | |
} | |
case 13: | |
*out++ = (packed >> ((13 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((14 * 25) % 32)) << (32 - ((14 * 25) % 32)))); | |
} | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((15 * 25) % 32)) << (32 - ((15 * 25) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((16 * 25) % 32)) << (32 - ((16 * 25) % 32)))); | |
} | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((17 * 25) % 32)) << (32 - ((17 * 25) % 32)))); | |
} | |
case 18: | |
*out++ = (packed >> ((18 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((19 * 25) % 32)) << (32 - ((19 * 25) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((20 * 25) % 32)) << (32 - ((20 * 25) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((21 * 25) % 32)) << (32 - ((21 * 25) % 32)))); | |
} | |
case 22: | |
*out++ = (packed >> ((22 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((23 * 25) % 32)) << (32 - ((23 * 25) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((24 * 25) % 32)) << (32 - ((24 * 25) % 32)))); | |
} | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((25 * 25) % 32)) << (32 - ((25 * 25) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((26 * 25) % 32)) << (32 - ((26 * 25) % 32)))); | |
} | |
case 27: | |
*out++ = (packed >> ((27 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((28 * 25) % 32)) << (32 - ((28 * 25) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((29 * 25) % 32)) << (32 - ((29 * 25) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((30 * 25) % 32)) << (32 - ((30 * 25) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((1 * 25) % 32)) << (32 - ((1 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((2 * 25) % 32)) << (32 - ((2 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((3 * 25) % 32)) << (32 - ((3 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
*out++ = (packed >> ((4 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((5 * 25) % 32)) << (32 - ((5 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((6 * 25) % 32)) << (32 - ((6 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((7 * 25) % 32)) << (32 - ((7 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((8 * 25) % 32)) << (32 - ((8 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
*out++ = (packed >> ((9 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((10 * 25) % 32)) << (32 - ((10 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((11 * 25) % 32)) << (32 - ((11 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((12 * 25) % 32)) << (32 - ((12 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
*out++ = (packed >> ((13 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((14 * 25) % 32)) << (32 - ((14 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((15 * 25) % 32)) << (32 - ((15 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((16 * 25) % 32)) << (32 - ((16 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((17 * 25) % 32)) << (32 - ((17 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
*out++ = (packed >> ((18 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((19 * 25) % 32)) << (32 - ((19 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((20 * 25) % 32)) << (32 - ((20 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((21 * 25) % 32)) << (32 - ((21 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
*out++ = (packed >> ((22 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((23 * 25) % 32)) << (32 - ((23 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((24 * 25) % 32)) << (32 - ((24 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((25 * 25) % 32)) << (32 - ((25 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((26 * 25) % 32)) << (32 - ((26 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
*out++ = (packed >> ((27 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((28 * 25) % 32)) << (32 - ((28 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((29 * 25) % 32)) << (32 - ((29 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 25) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 25) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 25) - 1) >> (32 - ((30 * 25) % 32)) << (32 - ((30 * 25) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 25) % 32)) & (uint32_t)((1ULL << 25) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_26(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)26) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)26) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 26) % 32); | |
case 1: | |
packed |= *in << ((1 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 26) % 32)); | |
case 2: | |
packed |= *in << ((2 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 26) % 32)); | |
case 3: | |
packed |= *in << ((3 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 26) % 32)); | |
case 4: | |
packed |= *in << ((4 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 26) % 32)); | |
case 5: | |
packed |= *in++ << ((5 * 26) % 32); | |
case 6: | |
packed |= *in << ((6 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 26) % 32)); | |
case 7: | |
packed |= *in << ((7 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 26) % 32)); | |
case 8: | |
packed |= *in << ((8 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 26) % 32)); | |
case 9: | |
packed |= *in << ((9 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 26) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 26) % 32); | |
case 11: | |
packed |= *in << ((11 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 26) % 32)); | |
case 12: | |
packed |= *in << ((12 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 26) % 32)); | |
case 13: | |
packed |= *in << ((13 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 26) % 32)); | |
case 14: | |
packed |= *in << ((14 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 26) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 26) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 26) % 32); | |
case 17: | |
packed |= *in << ((17 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 26) % 32)); | |
case 18: | |
packed |= *in << ((18 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 26) % 32)); | |
case 19: | |
packed |= *in << ((19 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 26) % 32)); | |
case 20: | |
packed |= *in << ((20 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 26) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 26) % 32); | |
case 22: | |
packed |= *in << ((22 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 26) % 32)); | |
case 23: | |
packed |= *in << ((23 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 26) % 32)); | |
case 24: | |
packed |= *in << ((24 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 26) % 32)); | |
case 25: | |
packed |= *in << ((25 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 26) % 32)); | |
case 26: | |
packed |= *in++ << ((26 * 26) % 32); | |
case 27: | |
packed |= *in << ((27 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 26) % 32)); | |
case 28: | |
packed |= *in << ((28 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 26) % 32)); | |
case 29: | |
packed |= *in << ((29 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 26) % 32)); | |
case 30: | |
packed |= *in << ((30 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 26) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 26) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 26) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 26) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 26) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 26) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 26) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 26) % 32); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 26) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 26) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 26) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 26) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 26) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 26) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 26) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 26) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 26) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 26) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 26) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 26) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 26) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 26) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 26) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 26) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 26) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 26) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 26) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 26) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 26) % 32); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 26) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 26) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 26) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 26) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 26) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 26) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 26 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)26 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_26(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)26) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((1 * 26) % 32)) << (32 - ((1 * 26) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((2 * 26) % 32)) << (32 - ((2 * 26) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((3 * 26) % 32)) << (32 - ((3 * 26) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((4 * 26) % 32)) << (32 - ((4 * 26) % 32)))); | |
} | |
case 5: | |
*out++ = (packed >> ((5 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((6 * 26) % 32)) << (32 - ((6 * 26) % 32)))); | |
} | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((7 * 26) % 32)) << (32 - ((7 * 26) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((8 * 26) % 32)) << (32 - ((8 * 26) % 32)))); | |
} | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((9 * 26) % 32)) << (32 - ((9 * 26) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((11 * 26) % 32)) << (32 - ((11 * 26) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((12 * 26) % 32)) << (32 - ((12 * 26) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((13 * 26) % 32)) << (32 - ((13 * 26) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((14 * 26) % 32)) << (32 - ((14 * 26) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((17 * 26) % 32)) << (32 - ((17 * 26) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((18 * 26) % 32)) << (32 - ((18 * 26) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((19 * 26) % 32)) << (32 - ((19 * 26) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((20 * 26) % 32)) << (32 - ((20 * 26) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((22 * 26) % 32)) << (32 - ((22 * 26) % 32)))); | |
} | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((23 * 26) % 32)) << (32 - ((23 * 26) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((24 * 26) % 32)) << (32 - ((24 * 26) % 32)))); | |
} | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((25 * 26) % 32)) << (32 - ((25 * 26) % 32)))); | |
} | |
case 26: | |
*out++ = (packed >> ((26 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((27 * 26) % 32)) << (32 - ((27 * 26) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((28 * 26) % 32)) << (32 - ((28 * 26) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((29 * 26) % 32)) << (32 - ((29 * 26) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((30 * 26) % 32)) << (32 - ((30 * 26) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((1 * 26) % 32)) << (32 - ((1 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((2 * 26) % 32)) << (32 - ((2 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((3 * 26) % 32)) << (32 - ((3 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((4 * 26) % 32)) << (32 - ((4 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
*out++ = (packed >> ((5 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((6 * 26) % 32)) << (32 - ((6 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((7 * 26) % 32)) << (32 - ((7 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((8 * 26) % 32)) << (32 - ((8 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((9 * 26) % 32)) << (32 - ((9 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((11 * 26) % 32)) << (32 - ((11 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((12 * 26) % 32)) << (32 - ((12 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((13 * 26) % 32)) << (32 - ((13 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((14 * 26) % 32)) << (32 - ((14 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((17 * 26) % 32)) << (32 - ((17 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((18 * 26) % 32)) << (32 - ((18 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((19 * 26) % 32)) << (32 - ((19 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((20 * 26) % 32)) << (32 - ((20 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((22 * 26) % 32)) << (32 - ((22 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((23 * 26) % 32)) << (32 - ((23 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((24 * 26) % 32)) << (32 - ((24 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((25 * 26) % 32)) << (32 - ((25 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
*out++ = (packed >> ((26 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((27 * 26) % 32)) << (32 - ((27 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((28 * 26) % 32)) << (32 - ((28 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((29 * 26) % 32)) << (32 - ((29 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 26) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 26) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 26) - 1) >> (32 - ((30 * 26) % 32)) << (32 - ((30 * 26) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 26) % 32)) & (uint32_t)((1ULL << 26) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_27(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)27) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)27) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 27) % 32); | |
case 1: | |
packed |= *in << ((1 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 27) % 32)); | |
case 2: | |
packed |= *in << ((2 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 27) % 32)); | |
case 3: | |
packed |= *in << ((3 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 27) % 32)); | |
case 4: | |
packed |= *in << ((4 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 27) % 32)); | |
case 5: | |
packed |= *in << ((5 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 27) % 32)); | |
case 6: | |
packed |= *in++ << ((6 * 27) % 32); | |
case 7: | |
packed |= *in << ((7 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 27) % 32)); | |
case 8: | |
packed |= *in << ((8 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 27) % 32)); | |
case 9: | |
packed |= *in << ((9 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 27) % 32)); | |
case 10: | |
packed |= *in << ((10 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 27) % 32)); | |
case 11: | |
packed |= *in << ((11 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 27) % 32)); | |
case 12: | |
packed |= *in++ << ((12 * 27) % 32); | |
case 13: | |
packed |= *in << ((13 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 27) % 32)); | |
case 14: | |
packed |= *in << ((14 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 27) % 32)); | |
case 15: | |
packed |= *in << ((15 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 27) % 32)); | |
case 16: | |
packed |= *in << ((16 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 27) % 32)); | |
case 17: | |
packed |= *in << ((17 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 27) % 32)); | |
case 18: | |
packed |= *in << ((18 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 27) % 32)); | |
case 19: | |
packed |= *in++ << ((19 * 27) % 32); | |
case 20: | |
packed |= *in << ((20 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 27) % 32)); | |
case 21: | |
packed |= *in << ((21 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 27) % 32)); | |
case 22: | |
packed |= *in << ((22 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 27) % 32)); | |
case 23: | |
packed |= *in << ((23 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 27) % 32)); | |
case 24: | |
packed |= *in << ((24 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 27) % 32)); | |
case 25: | |
packed |= *in++ << ((25 * 27) % 32); | |
case 26: | |
packed |= *in << ((26 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 27) % 32)); | |
case 27: | |
packed |= *in << ((27 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 27) % 32)); | |
case 28: | |
packed |= *in << ((28 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 27) % 32)); | |
case 29: | |
packed |= *in << ((29 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 27) % 32)); | |
case 30: | |
packed |= *in << ((30 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 27) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 27) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 27) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 27) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 27) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 27) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 27) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 27) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 27) % 32); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 27) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 27) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 27) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 27) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 27) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 27) % 32); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 27) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 27) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 27) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 27) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 27) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 27) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 27) % 32); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 27) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 27) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 27) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 27) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 27) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 27) % 32); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 27) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 27) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 27) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 27) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 27) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 27) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 27) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 27 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)27 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_27(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)27) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((1 * 27) % 32)) << (32 - ((1 * 27) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((2 * 27) % 32)) << (32 - ((2 * 27) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((3 * 27) % 32)) << (32 - ((3 * 27) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((4 * 27) % 32)) << (32 - ((4 * 27) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((5 * 27) % 32)) << (32 - ((5 * 27) % 32)))); | |
} | |
case 6: | |
*out++ = (packed >> ((6 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((7 * 27) % 32)) << (32 - ((7 * 27) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((8 * 27) % 32)) << (32 - ((8 * 27) % 32)))); | |
} | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((9 * 27) % 32)) << (32 - ((9 * 27) % 32)))); | |
} | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((10 * 27) % 32)) << (32 - ((10 * 27) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((11 * 27) % 32)) << (32 - ((11 * 27) % 32)))); | |
} | |
case 12: | |
*out++ = (packed >> ((12 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((13 * 27) % 32)) << (32 - ((13 * 27) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((14 * 27) % 32)) << (32 - ((14 * 27) % 32)))); | |
} | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((15 * 27) % 32)) << (32 - ((15 * 27) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((16 * 27) % 32)) << (32 - ((16 * 27) % 32)))); | |
} | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((17 * 27) % 32)) << (32 - ((17 * 27) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((18 * 27) % 32)) << (32 - ((18 * 27) % 32)))); | |
} | |
case 19: | |
*out++ = (packed >> ((19 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((20 * 27) % 32)) << (32 - ((20 * 27) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((21 * 27) % 32)) << (32 - ((21 * 27) % 32)))); | |
} | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((22 * 27) % 32)) << (32 - ((22 * 27) % 32)))); | |
} | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((23 * 27) % 32)) << (32 - ((23 * 27) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((24 * 27) % 32)) << (32 - ((24 * 27) % 32)))); | |
} | |
case 25: | |
*out++ = (packed >> ((25 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((26 * 27) % 32)) << (32 - ((26 * 27) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((27 * 27) % 32)) << (32 - ((27 * 27) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((28 * 27) % 32)) << (32 - ((28 * 27) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((29 * 27) % 32)) << (32 - ((29 * 27) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((30 * 27) % 32)) << (32 - ((30 * 27) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((1 * 27) % 32)) << (32 - ((1 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((2 * 27) % 32)) << (32 - ((2 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((3 * 27) % 32)) << (32 - ((3 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((4 * 27) % 32)) << (32 - ((4 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((5 * 27) % 32)) << (32 - ((5 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
*out++ = (packed >> ((6 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((7 * 27) % 32)) << (32 - ((7 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((8 * 27) % 32)) << (32 - ((8 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((9 * 27) % 32)) << (32 - ((9 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((10 * 27) % 32)) << (32 - ((10 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((11 * 27) % 32)) << (32 - ((11 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
*out++ = (packed >> ((12 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((13 * 27) % 32)) << (32 - ((13 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((14 * 27) % 32)) << (32 - ((14 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((15 * 27) % 32)) << (32 - ((15 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((16 * 27) % 32)) << (32 - ((16 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((17 * 27) % 32)) << (32 - ((17 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((18 * 27) % 32)) << (32 - ((18 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
*out++ = (packed >> ((19 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((20 * 27) % 32)) << (32 - ((20 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((21 * 27) % 32)) << (32 - ((21 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((22 * 27) % 32)) << (32 - ((22 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((23 * 27) % 32)) << (32 - ((23 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((24 * 27) % 32)) << (32 - ((24 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
*out++ = (packed >> ((25 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((26 * 27) % 32)) << (32 - ((26 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((27 * 27) % 32)) << (32 - ((27 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((28 * 27) % 32)) << (32 - ((28 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((29 * 27) % 32)) << (32 - ((29 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 27) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 27) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 27) - 1) >> (32 - ((30 * 27) % 32)) << (32 - ((30 * 27) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 27) % 32)) & (uint32_t)((1ULL << 27) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_28(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)28) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)28) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 28) % 32); | |
case 1: | |
packed |= *in << ((1 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 28) % 32)); | |
case 2: | |
packed |= *in << ((2 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 28) % 32)); | |
case 3: | |
packed |= *in << ((3 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 28) % 32)); | |
case 4: | |
packed |= *in << ((4 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 28) % 32)); | |
case 5: | |
packed |= *in << ((5 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 28) % 32)); | |
case 6: | |
packed |= *in << ((6 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 28) % 32)); | |
case 7: | |
packed |= *in++ << ((7 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 28) % 32); | |
case 9: | |
packed |= *in << ((9 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 28) % 32)); | |
case 10: | |
packed |= *in << ((10 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 28) % 32)); | |
case 11: | |
packed |= *in << ((11 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 28) % 32)); | |
case 12: | |
packed |= *in << ((12 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 28) % 32)); | |
case 13: | |
packed |= *in << ((13 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 28) % 32)); | |
case 14: | |
packed |= *in << ((14 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 28) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 28) % 32); | |
case 17: | |
packed |= *in << ((17 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 28) % 32)); | |
case 18: | |
packed |= *in << ((18 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 28) % 32)); | |
case 19: | |
packed |= *in << ((19 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 28) % 32)); | |
case 20: | |
packed |= *in << ((20 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 28) % 32)); | |
case 21: | |
packed |= *in << ((21 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 28) % 32)); | |
case 22: | |
packed |= *in << ((22 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 28) % 32)); | |
case 23: | |
packed |= *in++ << ((23 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 28) % 32); | |
case 25: | |
packed |= *in << ((25 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 28) % 32)); | |
case 26: | |
packed |= *in << ((26 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 28) % 32)); | |
case 27: | |
packed |= *in << ((27 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 28) % 32)); | |
case 28: | |
packed |= *in << ((28 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 28) % 32)); | |
case 29: | |
packed |= *in << ((29 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 28) % 32)); | |
case 30: | |
packed |= *in << ((30 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 28) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 28) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 28) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 28) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 28) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 28) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 28) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 28) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 28) % 32); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 28) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 28) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 28) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 28) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 28) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 28) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 28) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 28) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 28) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 28) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 28) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 28) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 28) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 28) % 32); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 28) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 28) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 28) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 28) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 28) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 28) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 28) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 28) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 28 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)28 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_28(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)28) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((1 * 28) % 32)) << (32 - ((1 * 28) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((2 * 28) % 32)) << (32 - ((2 * 28) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((3 * 28) % 32)) << (32 - ((3 * 28) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((4 * 28) % 32)) << (32 - ((4 * 28) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((5 * 28) % 32)) << (32 - ((5 * 28) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((6 * 28) % 32)) << (32 - ((6 * 28) % 32)))); | |
} | |
case 7: | |
*out++ = (packed >> ((7 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((9 * 28) % 32)) << (32 - ((9 * 28) % 32)))); | |
} | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((10 * 28) % 32)) << (32 - ((10 * 28) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((11 * 28) % 32)) << (32 - ((11 * 28) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((12 * 28) % 32)) << (32 - ((12 * 28) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((13 * 28) % 32)) << (32 - ((13 * 28) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((14 * 28) % 32)) << (32 - ((14 * 28) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((17 * 28) % 32)) << (32 - ((17 * 28) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((18 * 28) % 32)) << (32 - ((18 * 28) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((19 * 28) % 32)) << (32 - ((19 * 28) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((20 * 28) % 32)) << (32 - ((20 * 28) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((21 * 28) % 32)) << (32 - ((21 * 28) % 32)))); | |
} | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((22 * 28) % 32)) << (32 - ((22 * 28) % 32)))); | |
} | |
case 23: | |
*out++ = (packed >> ((23 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((25 * 28) % 32)) << (32 - ((25 * 28) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((26 * 28) % 32)) << (32 - ((26 * 28) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((27 * 28) % 32)) << (32 - ((27 * 28) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((28 * 28) % 32)) << (32 - ((28 * 28) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((29 * 28) % 32)) << (32 - ((29 * 28) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((30 * 28) % 32)) << (32 - ((30 * 28) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((1 * 28) % 32)) << (32 - ((1 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((2 * 28) % 32)) << (32 - ((2 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((3 * 28) % 32)) << (32 - ((3 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((4 * 28) % 32)) << (32 - ((4 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((5 * 28) % 32)) << (32 - ((5 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((6 * 28) % 32)) << (32 - ((6 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
*out++ = (packed >> ((7 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((9 * 28) % 32)) << (32 - ((9 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((10 * 28) % 32)) << (32 - ((10 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((11 * 28) % 32)) << (32 - ((11 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((12 * 28) % 32)) << (32 - ((12 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((13 * 28) % 32)) << (32 - ((13 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((14 * 28) % 32)) << (32 - ((14 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((17 * 28) % 32)) << (32 - ((17 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((18 * 28) % 32)) << (32 - ((18 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((19 * 28) % 32)) << (32 - ((19 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((20 * 28) % 32)) << (32 - ((20 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((21 * 28) % 32)) << (32 - ((21 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((22 * 28) % 32)) << (32 - ((22 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
*out++ = (packed >> ((23 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((25 * 28) % 32)) << (32 - ((25 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((26 * 28) % 32)) << (32 - ((26 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((27 * 28) % 32)) << (32 - ((27 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((28 * 28) % 32)) << (32 - ((28 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((29 * 28) % 32)) << (32 - ((29 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 28) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 28) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 28) - 1) >> (32 - ((30 * 28) % 32)) << (32 - ((30 * 28) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 28) % 32)) & (uint32_t)((1ULL << 28) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_29(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)29) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)29) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 29) % 32); | |
case 1: | |
packed |= *in << ((1 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 29) % 32)); | |
case 2: | |
packed |= *in << ((2 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 29) % 32)); | |
case 3: | |
packed |= *in << ((3 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 29) % 32)); | |
case 4: | |
packed |= *in << ((4 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 29) % 32)); | |
case 5: | |
packed |= *in << ((5 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 29) % 32)); | |
case 6: | |
packed |= *in << ((6 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 29) % 32)); | |
case 7: | |
packed |= *in << ((7 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 29) % 32)); | |
case 8: | |
packed |= *in << ((8 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 29) % 32)); | |
case 9: | |
packed |= *in << ((9 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 29) % 32)); | |
case 10: | |
packed |= *in++ << ((10 * 29) % 32); | |
case 11: | |
packed |= *in << ((11 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 29) % 32)); | |
case 12: | |
packed |= *in << ((12 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 29) % 32)); | |
case 13: | |
packed |= *in << ((13 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 29) % 32)); | |
case 14: | |
packed |= *in << ((14 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 29) % 32)); | |
case 15: | |
packed |= *in << ((15 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 29) % 32)); | |
case 16: | |
packed |= *in << ((16 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 29) % 32)); | |
case 17: | |
packed |= *in << ((17 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 29) % 32)); | |
case 18: | |
packed |= *in << ((18 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 29) % 32)); | |
case 19: | |
packed |= *in << ((19 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 29) % 32)); | |
case 20: | |
packed |= *in << ((20 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 29) % 32)); | |
case 21: | |
packed |= *in++ << ((21 * 29) % 32); | |
case 22: | |
packed |= *in << ((22 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 29) % 32)); | |
case 23: | |
packed |= *in << ((23 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 29) % 32)); | |
case 24: | |
packed |= *in << ((24 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 29) % 32)); | |
case 25: | |
packed |= *in << ((25 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 29) % 32)); | |
case 26: | |
packed |= *in << ((26 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 29) % 32)); | |
case 27: | |
packed |= *in << ((27 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 29) % 32)); | |
case 28: | |
packed |= *in << ((28 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 29) % 32)); | |
case 29: | |
packed |= *in << ((29 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 29) % 32)); | |
case 30: | |
packed |= *in << ((30 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 29) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 29) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 29) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 29) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 29) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 29) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 29) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 29) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 29) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 29) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 29) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 29) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 29) % 32); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 29) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 29) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 29) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 29) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 29) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 29) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 29) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 29) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 29) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 29) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 29) % 32); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 29) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 29) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 29) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 29) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 29) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 29) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 29) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 29) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 29) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 29) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 29) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 29 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)29 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_29(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)29) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((1 * 29) % 32)) << (32 - ((1 * 29) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((2 * 29) % 32)) << (32 - ((2 * 29) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((3 * 29) % 32)) << (32 - ((3 * 29) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((4 * 29) % 32)) << (32 - ((4 * 29) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((5 * 29) % 32)) << (32 - ((5 * 29) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((6 * 29) % 32)) << (32 - ((6 * 29) % 32)))); | |
} | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((7 * 29) % 32)) << (32 - ((7 * 29) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((8 * 29) % 32)) << (32 - ((8 * 29) % 32)))); | |
} | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((9 * 29) % 32)) << (32 - ((9 * 29) % 32)))); | |
} | |
case 10: | |
*out++ = (packed >> ((10 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((11 * 29) % 32)) << (32 - ((11 * 29) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((12 * 29) % 32)) << (32 - ((12 * 29) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((13 * 29) % 32)) << (32 - ((13 * 29) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((14 * 29) % 32)) << (32 - ((14 * 29) % 32)))); | |
} | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((15 * 29) % 32)) << (32 - ((15 * 29) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((16 * 29) % 32)) << (32 - ((16 * 29) % 32)))); | |
} | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((17 * 29) % 32)) << (32 - ((17 * 29) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((18 * 29) % 32)) << (32 - ((18 * 29) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((19 * 29) % 32)) << (32 - ((19 * 29) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((20 * 29) % 32)) << (32 - ((20 * 29) % 32)))); | |
} | |
case 21: | |
*out++ = (packed >> ((21 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((22 * 29) % 32)) << (32 - ((22 * 29) % 32)))); | |
} | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((23 * 29) % 32)) << (32 - ((23 * 29) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((24 * 29) % 32)) << (32 - ((24 * 29) % 32)))); | |
} | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((25 * 29) % 32)) << (32 - ((25 * 29) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((26 * 29) % 32)) << (32 - ((26 * 29) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((27 * 29) % 32)) << (32 - ((27 * 29) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((28 * 29) % 32)) << (32 - ((28 * 29) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((29 * 29) % 32)) << (32 - ((29 * 29) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((30 * 29) % 32)) << (32 - ((30 * 29) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((1 * 29) % 32)) << (32 - ((1 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((2 * 29) % 32)) << (32 - ((2 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((3 * 29) % 32)) << (32 - ((3 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((4 * 29) % 32)) << (32 - ((4 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((5 * 29) % 32)) << (32 - ((5 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((6 * 29) % 32)) << (32 - ((6 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((7 * 29) % 32)) << (32 - ((7 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((8 * 29) % 32)) << (32 - ((8 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((9 * 29) % 32)) << (32 - ((9 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
*out++ = (packed >> ((10 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((11 * 29) % 32)) << (32 - ((11 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((12 * 29) % 32)) << (32 - ((12 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((13 * 29) % 32)) << (32 - ((13 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((14 * 29) % 32)) << (32 - ((14 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((15 * 29) % 32)) << (32 - ((15 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((16 * 29) % 32)) << (32 - ((16 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((17 * 29) % 32)) << (32 - ((17 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((18 * 29) % 32)) << (32 - ((18 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((19 * 29) % 32)) << (32 - ((19 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((20 * 29) % 32)) << (32 - ((20 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
*out++ = (packed >> ((21 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((22 * 29) % 32)) << (32 - ((22 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((23 * 29) % 32)) << (32 - ((23 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((24 * 29) % 32)) << (32 - ((24 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((25 * 29) % 32)) << (32 - ((25 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((26 * 29) % 32)) << (32 - ((26 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((27 * 29) % 32)) << (32 - ((27 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((28 * 29) % 32)) << (32 - ((28 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((29 * 29) % 32)) << (32 - ((29 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 29) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 29) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 29) - 1) >> (32 - ((30 * 29) % 32)) << (32 - ((30 * 29) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 29) % 32)) & (uint32_t)((1ULL << 29) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_30(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)30) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)30) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 30) % 32); | |
case 1: | |
packed |= *in << ((1 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 30) % 32)); | |
case 2: | |
packed |= *in << ((2 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 30) % 32)); | |
case 3: | |
packed |= *in << ((3 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 30) % 32)); | |
case 4: | |
packed |= *in << ((4 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 30) % 32)); | |
case 5: | |
packed |= *in << ((5 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 30) % 32)); | |
case 6: | |
packed |= *in << ((6 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 30) % 32)); | |
case 7: | |
packed |= *in << ((7 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 30) % 32)); | |
case 8: | |
packed |= *in << ((8 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 30) % 32)); | |
case 9: | |
packed |= *in << ((9 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 30) % 32)); | |
case 10: | |
packed |= *in << ((10 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 30) % 32)); | |
case 11: | |
packed |= *in << ((11 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 30) % 32)); | |
case 12: | |
packed |= *in << ((12 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 30) % 32)); | |
case 13: | |
packed |= *in << ((13 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 30) % 32)); | |
case 14: | |
packed |= *in << ((14 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 30) % 32)); | |
case 15: | |
packed |= *in++ << ((15 * 30) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 30) % 32); | |
case 17: | |
packed |= *in << ((17 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 30) % 32)); | |
case 18: | |
packed |= *in << ((18 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 30) % 32)); | |
case 19: | |
packed |= *in << ((19 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 30) % 32)); | |
case 20: | |
packed |= *in << ((20 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 30) % 32)); | |
case 21: | |
packed |= *in << ((21 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 30) % 32)); | |
case 22: | |
packed |= *in << ((22 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 30) % 32)); | |
case 23: | |
packed |= *in << ((23 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 30) % 32)); | |
case 24: | |
packed |= *in << ((24 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 30) % 32)); | |
case 25: | |
packed |= *in << ((25 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 30) % 32)); | |
case 26: | |
packed |= *in << ((26 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 30) % 32)); | |
case 27: | |
packed |= *in << ((27 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 30) % 32)); | |
case 28: | |
packed |= *in << ((28 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 30) % 32)); | |
case 29: | |
packed |= *in << ((29 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 30) % 32)); | |
case 30: | |
packed |= *in << ((30 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 30) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 30) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 30) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 30) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 30) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 30) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 30) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 30) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 30) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 30) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 30) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 30) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 30) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 30) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 30) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 30) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 30) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 30) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 30) % 32); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 30) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 30) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 30) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 30) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 30) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 30) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 30) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 30) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 30) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 30) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 30) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 30) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 30) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 30) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 30) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 30) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 30 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)30 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_30(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)30) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((1 * 30) % 32)) << (32 - ((1 * 30) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((2 * 30) % 32)) << (32 - ((2 * 30) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((3 * 30) % 32)) << (32 - ((3 * 30) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((4 * 30) % 32)) << (32 - ((4 * 30) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((5 * 30) % 32)) << (32 - ((5 * 30) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((6 * 30) % 32)) << (32 - ((6 * 30) % 32)))); | |
} | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((7 * 30) % 32)) << (32 - ((7 * 30) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((8 * 30) % 32)) << (32 - ((8 * 30) % 32)))); | |
} | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((9 * 30) % 32)) << (32 - ((9 * 30) % 32)))); | |
} | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((10 * 30) % 32)) << (32 - ((10 * 30) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((11 * 30) % 32)) << (32 - ((11 * 30) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((12 * 30) % 32)) << (32 - ((12 * 30) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((13 * 30) % 32)) << (32 - ((13 * 30) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((14 * 30) % 32)) << (32 - ((14 * 30) % 32)))); | |
} | |
case 15: | |
*out++ = (packed >> ((15 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((17 * 30) % 32)) << (32 - ((17 * 30) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((18 * 30) % 32)) << (32 - ((18 * 30) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((19 * 30) % 32)) << (32 - ((19 * 30) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((20 * 30) % 32)) << (32 - ((20 * 30) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((21 * 30) % 32)) << (32 - ((21 * 30) % 32)))); | |
} | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((22 * 30) % 32)) << (32 - ((22 * 30) % 32)))); | |
} | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((23 * 30) % 32)) << (32 - ((23 * 30) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((24 * 30) % 32)) << (32 - ((24 * 30) % 32)))); | |
} | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((25 * 30) % 32)) << (32 - ((25 * 30) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((26 * 30) % 32)) << (32 - ((26 * 30) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((27 * 30) % 32)) << (32 - ((27 * 30) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((28 * 30) % 32)) << (32 - ((28 * 30) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((29 * 30) % 32)) << (32 - ((29 * 30) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((30 * 30) % 32)) << (32 - ((30 * 30) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((1 * 30) % 32)) << (32 - ((1 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((2 * 30) % 32)) << (32 - ((2 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((3 * 30) % 32)) << (32 - ((3 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((4 * 30) % 32)) << (32 - ((4 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((5 * 30) % 32)) << (32 - ((5 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((6 * 30) % 32)) << (32 - ((6 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((7 * 30) % 32)) << (32 - ((7 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((8 * 30) % 32)) << (32 - ((8 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((9 * 30) % 32)) << (32 - ((9 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((10 * 30) % 32)) << (32 - ((10 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((11 * 30) % 32)) << (32 - ((11 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((12 * 30) % 32)) << (32 - ((12 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((13 * 30) % 32)) << (32 - ((13 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((14 * 30) % 32)) << (32 - ((14 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
*out++ = (packed >> ((15 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((17 * 30) % 32)) << (32 - ((17 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((18 * 30) % 32)) << (32 - ((18 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((19 * 30) % 32)) << (32 - ((19 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((20 * 30) % 32)) << (32 - ((20 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((21 * 30) % 32)) << (32 - ((21 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((22 * 30) % 32)) << (32 - ((22 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((23 * 30) % 32)) << (32 - ((23 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((24 * 30) % 32)) << (32 - ((24 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((25 * 30) % 32)) << (32 - ((25 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((26 * 30) % 32)) << (32 - ((26 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((27 * 30) % 32)) << (32 - ((27 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((28 * 30) % 32)) << (32 - ((28 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((29 * 30) % 32)) << (32 - ((29 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 30) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 30) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 30) - 1) >> (32 - ((30 * 30) % 32)) << (32 - ((30 * 30) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 30) % 32)) & (uint32_t)((1ULL << 30) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_31(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)31) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)31) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 31) % 32); | |
case 1: | |
packed |= *in << ((1 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 31) % 32)); | |
case 2: | |
packed |= *in << ((2 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 31) % 32)); | |
case 3: | |
packed |= *in << ((3 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 31) % 32)); | |
case 4: | |
packed |= *in << ((4 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 31) % 32)); | |
case 5: | |
packed |= *in << ((5 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 31) % 32)); | |
case 6: | |
packed |= *in << ((6 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 31) % 32)); | |
case 7: | |
packed |= *in << ((7 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 31) % 32)); | |
case 8: | |
packed |= *in << ((8 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 31) % 32)); | |
case 9: | |
packed |= *in << ((9 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 31) % 32)); | |
case 10: | |
packed |= *in << ((10 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 31) % 32)); | |
case 11: | |
packed |= *in << ((11 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 31) % 32)); | |
case 12: | |
packed |= *in << ((12 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 31) % 32)); | |
case 13: | |
packed |= *in << ((13 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 31) % 32)); | |
case 14: | |
packed |= *in << ((14 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 31) % 32)); | |
case 15: | |
packed |= *in << ((15 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 31) % 32)); | |
case 16: | |
packed |= *in << ((16 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 31) % 32)); | |
case 17: | |
packed |= *in << ((17 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 31) % 32)); | |
case 18: | |
packed |= *in << ((18 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 31) % 32)); | |
case 19: | |
packed |= *in << ((19 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 31) % 32)); | |
case 20: | |
packed |= *in << ((20 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 31) % 32)); | |
case 21: | |
packed |= *in << ((21 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 31) % 32)); | |
case 22: | |
packed |= *in << ((22 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 31) % 32)); | |
case 23: | |
packed |= *in << ((23 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 31) % 32)); | |
case 24: | |
packed |= *in << ((24 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 31) % 32)); | |
case 25: | |
packed |= *in << ((25 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 31) % 32)); | |
case 26: | |
packed |= *in << ((26 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 31) % 32)); | |
case 27: | |
packed |= *in << ((27 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 31) % 32)); | |
case 28: | |
packed |= *in << ((28 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 31) % 32)); | |
case 29: | |
packed |= *in << ((29 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 31) % 32)); | |
case 30: | |
packed |= *in << ((30 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 31) % 32)); | |
case 31: | |
packed |= *in++ << ((31 * 31) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 31) % 32); | |
if (in == end) break; | |
case 1: | |
packed |= *in << ((1 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((1 * 31) % 32)); | |
if (in == end) break; | |
case 2: | |
packed |= *in << ((2 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((2 * 31) % 32)); | |
if (in == end) break; | |
case 3: | |
packed |= *in << ((3 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((3 * 31) % 32)); | |
if (in == end) break; | |
case 4: | |
packed |= *in << ((4 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((4 * 31) % 32)); | |
if (in == end) break; | |
case 5: | |
packed |= *in << ((5 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((5 * 31) % 32)); | |
if (in == end) break; | |
case 6: | |
packed |= *in << ((6 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((6 * 31) % 32)); | |
if (in == end) break; | |
case 7: | |
packed |= *in << ((7 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((7 * 31) % 32)); | |
if (in == end) break; | |
case 8: | |
packed |= *in << ((8 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((8 * 31) % 32)); | |
if (in == end) break; | |
case 9: | |
packed |= *in << ((9 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((9 * 31) % 32)); | |
if (in == end) break; | |
case 10: | |
packed |= *in << ((10 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((10 * 31) % 32)); | |
if (in == end) break; | |
case 11: | |
packed |= *in << ((11 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((11 * 31) % 32)); | |
if (in == end) break; | |
case 12: | |
packed |= *in << ((12 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((12 * 31) % 32)); | |
if (in == end) break; | |
case 13: | |
packed |= *in << ((13 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((13 * 31) % 32)); | |
if (in == end) break; | |
case 14: | |
packed |= *in << ((14 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((14 * 31) % 32)); | |
if (in == end) break; | |
case 15: | |
packed |= *in << ((15 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((15 * 31) % 32)); | |
if (in == end) break; | |
case 16: | |
packed |= *in << ((16 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((16 * 31) % 32)); | |
if (in == end) break; | |
case 17: | |
packed |= *in << ((17 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((17 * 31) % 32)); | |
if (in == end) break; | |
case 18: | |
packed |= *in << ((18 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((18 * 31) % 32)); | |
if (in == end) break; | |
case 19: | |
packed |= *in << ((19 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((19 * 31) % 32)); | |
if (in == end) break; | |
case 20: | |
packed |= *in << ((20 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((20 * 31) % 32)); | |
if (in == end) break; | |
case 21: | |
packed |= *in << ((21 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((21 * 31) % 32)); | |
if (in == end) break; | |
case 22: | |
packed |= *in << ((22 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((22 * 31) % 32)); | |
if (in == end) break; | |
case 23: | |
packed |= *in << ((23 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((23 * 31) % 32)); | |
if (in == end) break; | |
case 24: | |
packed |= *in << ((24 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((24 * 31) % 32)); | |
if (in == end) break; | |
case 25: | |
packed |= *in << ((25 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((25 * 31) % 32)); | |
if (in == end) break; | |
case 26: | |
packed |= *in << ((26 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((26 * 31) % 32)); | |
if (in == end) break; | |
case 27: | |
packed |= *in << ((27 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((27 * 31) % 32)); | |
if (in == end) break; | |
case 28: | |
packed |= *in << ((28 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((28 * 31) % 32)); | |
if (in == end) break; | |
case 29: | |
packed |= *in << ((29 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((29 * 31) % 32)); | |
if (in == end) break; | |
case 30: | |
packed |= *in << ((30 * 31) % 32); | |
*out++ = packed; | |
packed = *in++ >> (32 - ((30 * 31) % 32)); | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 31) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 31 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)31 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_31(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)31) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 31) % 32)) & (uint32_t)((1ULL << 31) - 1); | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((1 * 31) % 32)) << (32 - ((1 * 31) % 32)))); | |
} | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((2 * 31) % 32)) << (32 - ((2 * 31) % 32)))); | |
} | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((3 * 31) % 32)) << (32 - ((3 * 31) % 32)))); | |
} | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((4 * 31) % 32)) << (32 - ((4 * 31) % 32)))); | |
} | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((5 * 31) % 32)) << (32 - ((5 * 31) % 32)))); | |
} | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((6 * 31) % 32)) << (32 - ((6 * 31) % 32)))); | |
} | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((7 * 31) % 32)) << (32 - ((7 * 31) % 32)))); | |
} | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((8 * 31) % 32)) << (32 - ((8 * 31) % 32)))); | |
} | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((9 * 31) % 32)) << (32 - ((9 * 31) % 32)))); | |
} | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((10 * 31) % 32)) << (32 - ((10 * 31) % 32)))); | |
} | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((11 * 31) % 32)) << (32 - ((11 * 31) % 32)))); | |
} | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((12 * 31) % 32)) << (32 - ((12 * 31) % 32)))); | |
} | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((13 * 31) % 32)) << (32 - ((13 * 31) % 32)))); | |
} | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((14 * 31) % 32)) << (32 - ((14 * 31) % 32)))); | |
} | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((15 * 31) % 32)) << (32 - ((15 * 31) % 32)))); | |
} | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((16 * 31) % 32)) << (32 - ((16 * 31) % 32)))); | |
} | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((17 * 31) % 32)) << (32 - ((17 * 31) % 32)))); | |
} | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((18 * 31) % 32)) << (32 - ((18 * 31) % 32)))); | |
} | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((19 * 31) % 32)) << (32 - ((19 * 31) % 32)))); | |
} | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((20 * 31) % 32)) << (32 - ((20 * 31) % 32)))); | |
} | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((21 * 31) % 32)) << (32 - ((21 * 31) % 32)))); | |
} | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((22 * 31) % 32)) << (32 - ((22 * 31) % 32)))); | |
} | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((23 * 31) % 32)) << (32 - ((23 * 31) % 32)))); | |
} | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((24 * 31) % 32)) << (32 - ((24 * 31) % 32)))); | |
} | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((25 * 31) % 32)) << (32 - ((25 * 31) % 32)))); | |
} | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((26 * 31) % 32)) << (32 - ((26 * 31) % 32)))); | |
} | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((27 * 31) % 32)) << (32 - ((27 * 31) % 32)))); | |
} | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((28 * 31) % 32)) << (32 - ((28 * 31) % 32)))); | |
} | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((29 * 31) % 32)) << (32 - ((29 * 31) % 32)))); | |
} | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((30 * 31) % 32)) << (32 - ((30 * 31) % 32)))); | |
} | |
case 31: | |
*out++ = (packed >> ((31 * 31) % 32)) & (uint32_t)((1ULL << 31) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 31) % 32)) & (uint32_t)((1ULL << 31) - 1); | |
if (out == end) break; | |
case 1: | |
{ | |
uint32_t low, high; | |
low = packed >> ((1 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((1 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((1 * 31) % 32)) << (32 - ((1 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 2: | |
{ | |
uint32_t low, high; | |
low = packed >> ((2 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((2 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((2 * 31) % 32)) << (32 - ((2 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 3: | |
{ | |
uint32_t low, high; | |
low = packed >> ((3 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((3 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((3 * 31) % 32)) << (32 - ((3 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 4: | |
{ | |
uint32_t low, high; | |
low = packed >> ((4 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((4 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((4 * 31) % 32)) << (32 - ((4 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 5: | |
{ | |
uint32_t low, high; | |
low = packed >> ((5 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((5 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((5 * 31) % 32)) << (32 - ((5 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 6: | |
{ | |
uint32_t low, high; | |
low = packed >> ((6 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((6 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((6 * 31) % 32)) << (32 - ((6 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 7: | |
{ | |
uint32_t low, high; | |
low = packed >> ((7 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((7 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((7 * 31) % 32)) << (32 - ((7 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 8: | |
{ | |
uint32_t low, high; | |
low = packed >> ((8 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((8 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((8 * 31) % 32)) << (32 - ((8 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 9: | |
{ | |
uint32_t low, high; | |
low = packed >> ((9 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((9 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((9 * 31) % 32)) << (32 - ((9 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 10: | |
{ | |
uint32_t low, high; | |
low = packed >> ((10 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((10 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((10 * 31) % 32)) << (32 - ((10 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 11: | |
{ | |
uint32_t low, high; | |
low = packed >> ((11 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((11 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((11 * 31) % 32)) << (32 - ((11 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 12: | |
{ | |
uint32_t low, high; | |
low = packed >> ((12 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((12 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((12 * 31) % 32)) << (32 - ((12 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 13: | |
{ | |
uint32_t low, high; | |
low = packed >> ((13 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((13 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((13 * 31) % 32)) << (32 - ((13 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 14: | |
{ | |
uint32_t low, high; | |
low = packed >> ((14 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((14 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((14 * 31) % 32)) << (32 - ((14 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 15: | |
{ | |
uint32_t low, high; | |
low = packed >> ((15 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((15 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((15 * 31) % 32)) << (32 - ((15 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 16: | |
{ | |
uint32_t low, high; | |
low = packed >> ((16 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((16 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((16 * 31) % 32)) << (32 - ((16 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 17: | |
{ | |
uint32_t low, high; | |
low = packed >> ((17 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((17 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((17 * 31) % 32)) << (32 - ((17 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 18: | |
{ | |
uint32_t low, high; | |
low = packed >> ((18 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((18 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((18 * 31) % 32)) << (32 - ((18 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 19: | |
{ | |
uint32_t low, high; | |
low = packed >> ((19 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((19 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((19 * 31) % 32)) << (32 - ((19 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 20: | |
{ | |
uint32_t low, high; | |
low = packed >> ((20 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((20 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((20 * 31) % 32)) << (32 - ((20 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 21: | |
{ | |
uint32_t low, high; | |
low = packed >> ((21 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((21 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((21 * 31) % 32)) << (32 - ((21 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 22: | |
{ | |
uint32_t low, high; | |
low = packed >> ((22 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((22 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((22 * 31) % 32)) << (32 - ((22 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 23: | |
{ | |
uint32_t low, high; | |
low = packed >> ((23 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((23 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((23 * 31) % 32)) << (32 - ((23 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 24: | |
{ | |
uint32_t low, high; | |
low = packed >> ((24 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((24 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((24 * 31) % 32)) << (32 - ((24 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 25: | |
{ | |
uint32_t low, high; | |
low = packed >> ((25 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((25 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((25 * 31) % 32)) << (32 - ((25 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 26: | |
{ | |
uint32_t low, high; | |
low = packed >> ((26 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((26 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((26 * 31) % 32)) << (32 - ((26 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 27: | |
{ | |
uint32_t low, high; | |
low = packed >> ((27 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((27 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((27 * 31) % 32)) << (32 - ((27 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 28: | |
{ | |
uint32_t low, high; | |
low = packed >> ((28 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((28 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((28 * 31) % 32)) << (32 - ((28 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 29: | |
{ | |
uint32_t low, high; | |
low = packed >> ((29 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((29 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((29 * 31) % 32)) << (32 - ((29 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 30: | |
{ | |
uint32_t low, high; | |
low = packed >> ((30 * 31) % 32); | |
packed = *++in; | |
high = packed << (32 - ((30 * 31) % 32)); | |
*out++ = low ^ ((low ^ high) & ((uint32_t)((1ULL << 31) - 1) >> (32 - ((30 * 31) % 32)) << (32 - ((30 * 31) % 32)))); | |
} | |
if (out == end) break; | |
case 31: | |
*out++ = (packed >> ((31 * 31) % 32)) & (uint32_t)((1ULL << 31) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
void __PackedArray_pack_32(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t startBit; | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
out += ((uint64_t)offset * (uint64_t)32) / 32; | |
startBit = ((uint64_t)offset * (uint64_t)32) % 32; | |
packed = *out & (uint32_t)((1ULL << startBit) - 1); | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 1: | |
packed |= *in++ << ((1 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 2: | |
packed |= *in++ << ((2 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 3: | |
packed |= *in++ << ((3 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 4: | |
packed |= *in++ << ((4 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 5: | |
packed |= *in++ << ((5 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 6: | |
packed |= *in++ << ((6 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 7: | |
packed |= *in++ << ((7 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 8: | |
packed |= *in++ << ((8 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 9: | |
packed |= *in++ << ((9 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 10: | |
packed |= *in++ << ((10 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 11: | |
packed |= *in++ << ((11 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 12: | |
packed |= *in++ << ((12 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 13: | |
packed |= *in++ << ((13 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 14: | |
packed |= *in++ << ((14 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 15: | |
packed |= *in++ << ((15 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 16: | |
packed |= *in++ << ((16 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 17: | |
packed |= *in++ << ((17 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 18: | |
packed |= *in++ << ((18 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 19: | |
packed |= *in++ << ((19 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 20: | |
packed |= *in++ << ((20 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 21: | |
packed |= *in++ << ((21 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 22: | |
packed |= *in++ << ((22 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 23: | |
packed |= *in++ << ((23 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 24: | |
packed |= *in++ << ((24 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 25: | |
packed |= *in++ << ((25 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 26: | |
packed |= *in++ << ((26 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 27: | |
packed |= *in++ << ((27 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 28: | |
packed |= *in++ << ((28 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 29: | |
packed |= *in++ << ((29 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 30: | |
packed |= *in++ << ((30 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
case 31: | |
packed |= *in++ << ((31 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
offset = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset) | |
{ | |
case 0: | |
packed |= *in++ << ((0 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 1: | |
packed |= *in++ << ((1 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 2: | |
packed |= *in++ << ((2 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 3: | |
packed |= *in++ << ((3 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 4: | |
packed |= *in++ << ((4 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 5: | |
packed |= *in++ << ((5 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 6: | |
packed |= *in++ << ((6 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 7: | |
packed |= *in++ << ((7 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 8: | |
packed |= *in++ << ((8 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 9: | |
packed |= *in++ << ((9 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 10: | |
packed |= *in++ << ((10 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 11: | |
packed |= *in++ << ((11 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 12: | |
packed |= *in++ << ((12 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 13: | |
packed |= *in++ << ((13 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 14: | |
packed |= *in++ << ((14 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 15: | |
packed |= *in++ << ((15 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 16: | |
packed |= *in++ << ((16 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 17: | |
packed |= *in++ << ((17 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 18: | |
packed |= *in++ << ((18 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 19: | |
packed |= *in++ << ((19 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 20: | |
packed |= *in++ << ((20 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 21: | |
packed |= *in++ << ((21 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 22: | |
packed |= *in++ << ((22 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 23: | |
packed |= *in++ << ((23 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 24: | |
packed |= *in++ << ((24 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 25: | |
packed |= *in++ << ((25 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 26: | |
packed |= *in++ << ((26 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 27: | |
packed |= *in++ << ((27 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 28: | |
packed |= *in++ << ((28 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 29: | |
packed |= *in++ << ((29 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 30: | |
packed |= *in++ << ((30 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
case 31: | |
packed |= *in++ << ((31 * 32) % 32); | |
*out++ = packed; | |
packed = 0; | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count * 32 + startBit) % 32) | |
{ | |
packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)32 + startBit - 1) % 32) + 1)) - 1); | |
*out = packed; | |
} | |
} | |
void __PackedArray_unpack_32(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t packed; | |
const uint32_t* __restrict end; | |
in += ((uint64_t)offset * (uint64_t)32) / 32; | |
packed = *in; | |
offset = offset % 32; | |
if (count >= 32 - offset) | |
{ | |
int32_t n; | |
n = (count + offset) / 32; | |
count -= 32 * n - offset; | |
switch (offset) | |
{ | |
do | |
{ | |
packed = *++in; | |
case 0: | |
*out++ = (packed >> ((0 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 1: | |
*out++ = (packed >> ((1 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 2: | |
*out++ = (packed >> ((2 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 3: | |
*out++ = (packed >> ((3 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 5: | |
*out++ = (packed >> ((5 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 6: | |
*out++ = (packed >> ((6 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 7: | |
*out++ = (packed >> ((7 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 9: | |
*out++ = (packed >> ((9 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 10: | |
*out++ = (packed >> ((10 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 11: | |
*out++ = (packed >> ((11 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 13: | |
*out++ = (packed >> ((13 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 14: | |
*out++ = (packed >> ((14 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 15: | |
*out++ = (packed >> ((15 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 17: | |
*out++ = (packed >> ((17 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 18: | |
*out++ = (packed >> ((18 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 19: | |
*out++ = (packed >> ((19 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 21: | |
*out++ = (packed >> ((21 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 22: | |
*out++ = (packed >> ((22 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 23: | |
*out++ = (packed >> ((23 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 25: | |
*out++ = (packed >> ((25 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 26: | |
*out++ = (packed >> ((26 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 27: | |
*out++ = (packed >> ((27 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 29: | |
*out++ = (packed >> ((29 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 30: | |
*out++ = (packed >> ((30 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
packed = *++in; | |
case 31: | |
*out++ = (packed >> ((31 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
return; | |
packed = *++in; | |
offset = 0; | |
} | |
end = out + count; | |
switch (offset) | |
{ | |
case 0: | |
*out++ = (packed >> ((0 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 1: | |
*out++ = (packed >> ((1 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 2: | |
*out++ = (packed >> ((2 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 3: | |
*out++ = (packed >> ((3 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 4: | |
*out++ = (packed >> ((4 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 5: | |
*out++ = (packed >> ((5 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 6: | |
*out++ = (packed >> ((6 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 7: | |
*out++ = (packed >> ((7 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 8: | |
*out++ = (packed >> ((8 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 9: | |
*out++ = (packed >> ((9 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 10: | |
*out++ = (packed >> ((10 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 11: | |
*out++ = (packed >> ((11 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 12: | |
*out++ = (packed >> ((12 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 13: | |
*out++ = (packed >> ((13 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 14: | |
*out++ = (packed >> ((14 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 15: | |
*out++ = (packed >> ((15 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 16: | |
*out++ = (packed >> ((16 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 17: | |
*out++ = (packed >> ((17 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 18: | |
*out++ = (packed >> ((18 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 19: | |
*out++ = (packed >> ((19 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 20: | |
*out++ = (packed >> ((20 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 21: | |
*out++ = (packed >> ((21 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 22: | |
*out++ = (packed >> ((22 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 23: | |
*out++ = (packed >> ((23 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 24: | |
*out++ = (packed >> ((24 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 25: | |
*out++ = (packed >> ((25 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 26: | |
*out++ = (packed >> ((26 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 27: | |
*out++ = (packed >> ((27 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 28: | |
*out++ = (packed >> ((28 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 29: | |
*out++ = (packed >> ((29 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 30: | |
*out++ = (packed >> ((30 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
packed = *++in; | |
case 31: | |
*out++ = (packed >> ((31 * 32) % 32)) & (uint32_t)((1ULL << 32) - 1); | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
#include <stdlib.h> | |
#include <stddef.h> | |
PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count) | |
{ | |
PackedArray* a; | |
size_t bufferSize; | |
assert(bitsPerItem > 0); | |
assert(bitsPerItem <= 32); | |
bufferSize = sizeof(uint32_t) * (((uint64_t)bitsPerItem * (uint64_t)count + 31) / 32); | |
a = (PackedArray*)malloc(sizeof(PackedArray) + bufferSize); | |
if (a != NULL) | |
{ | |
a->buffer[((uint64_t)bitsPerItem * (uint64_t)count + 31) / 32 - 1] = 0; | |
a->bitsPerItem = bitsPerItem; | |
a->count = count; | |
} | |
return a; | |
} | |
void PackedArray_destroy(PackedArray* a) | |
{ | |
assert(a); | |
free(a); | |
} | |
void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count) | |
{ | |
assert(a != NULL); | |
assert(in != NULL); | |
switch (a->bitsPerItem) | |
{ | |
case 1: __PackedArray_pack_1(a->buffer, offset, in, count); break; | |
case 2: __PackedArray_pack_2(a->buffer, offset, in, count); break; | |
case 3: __PackedArray_pack_3(a->buffer, offset, in, count); break; | |
case 4: __PackedArray_pack_4(a->buffer, offset, in, count); break; | |
case 5: __PackedArray_pack_5(a->buffer, offset, in, count); break; | |
case 6: __PackedArray_pack_6(a->buffer, offset, in, count); break; | |
case 7: __PackedArray_pack_7(a->buffer, offset, in, count); break; | |
case 8: __PackedArray_pack_8(a->buffer, offset, in, count); break; | |
case 9: __PackedArray_pack_9(a->buffer, offset, in, count); break; | |
case 10: __PackedArray_pack_10(a->buffer, offset, in, count); break; | |
case 11: __PackedArray_pack_11(a->buffer, offset, in, count); break; | |
case 12: __PackedArray_pack_12(a->buffer, offset, in, count); break; | |
case 13: __PackedArray_pack_13(a->buffer, offset, in, count); break; | |
case 14: __PackedArray_pack_14(a->buffer, offset, in, count); break; | |
case 15: __PackedArray_pack_15(a->buffer, offset, in, count); break; | |
case 16: __PackedArray_pack_16(a->buffer, offset, in, count); break; | |
case 17: __PackedArray_pack_17(a->buffer, offset, in, count); break; | |
case 18: __PackedArray_pack_18(a->buffer, offset, in, count); break; | |
case 19: __PackedArray_pack_19(a->buffer, offset, in, count); break; | |
case 20: __PackedArray_pack_20(a->buffer, offset, in, count); break; | |
case 21: __PackedArray_pack_21(a->buffer, offset, in, count); break; | |
case 22: __PackedArray_pack_22(a->buffer, offset, in, count); break; | |
case 23: __PackedArray_pack_23(a->buffer, offset, in, count); break; | |
case 24: __PackedArray_pack_24(a->buffer, offset, in, count); break; | |
case 25: __PackedArray_pack_25(a->buffer, offset, in, count); break; | |
case 26: __PackedArray_pack_26(a->buffer, offset, in, count); break; | |
case 27: __PackedArray_pack_27(a->buffer, offset, in, count); break; | |
case 28: __PackedArray_pack_28(a->buffer, offset, in, count); break; | |
case 29: __PackedArray_pack_29(a->buffer, offset, in, count); break; | |
case 30: __PackedArray_pack_30(a->buffer, offset, in, count); break; | |
case 31: __PackedArray_pack_31(a->buffer, offset, in, count); break; | |
case 32: __PackedArray_pack_32(a->buffer, offset, in, count); break; | |
} | |
} | |
void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count) | |
{ | |
assert(a != NULL); | |
assert(out != NULL); | |
switch (a->bitsPerItem) | |
{ | |
case 1: __PackedArray_unpack_1(a->buffer, offset, out, count); break; | |
case 2: __PackedArray_unpack_2(a->buffer, offset, out, count); break; | |
case 3: __PackedArray_unpack_3(a->buffer, offset, out, count); break; | |
case 4: __PackedArray_unpack_4(a->buffer, offset, out, count); break; | |
case 5: __PackedArray_unpack_5(a->buffer, offset, out, count); break; | |
case 6: __PackedArray_unpack_6(a->buffer, offset, out, count); break; | |
case 7: __PackedArray_unpack_7(a->buffer, offset, out, count); break; | |
case 8: __PackedArray_unpack_8(a->buffer, offset, out, count); break; | |
case 9: __PackedArray_unpack_9(a->buffer, offset, out, count); break; | |
case 10: __PackedArray_unpack_10(a->buffer, offset, out, count); break; | |
case 11: __PackedArray_unpack_11(a->buffer, offset, out, count); break; | |
case 12: __PackedArray_unpack_12(a->buffer, offset, out, count); break; | |
case 13: __PackedArray_unpack_13(a->buffer, offset, out, count); break; | |
case 14: __PackedArray_unpack_14(a->buffer, offset, out, count); break; | |
case 15: __PackedArray_unpack_15(a->buffer, offset, out, count); break; | |
case 16: __PackedArray_unpack_16(a->buffer, offset, out, count); break; | |
case 17: __PackedArray_unpack_17(a->buffer, offset, out, count); break; | |
case 18: __PackedArray_unpack_18(a->buffer, offset, out, count); break; | |
case 19: __PackedArray_unpack_19(a->buffer, offset, out, count); break; | |
case 20: __PackedArray_unpack_20(a->buffer, offset, out, count); break; | |
case 21: __PackedArray_unpack_21(a->buffer, offset, out, count); break; | |
case 22: __PackedArray_unpack_22(a->buffer, offset, out, count); break; | |
case 23: __PackedArray_unpack_23(a->buffer, offset, out, count); break; | |
case 24: __PackedArray_unpack_24(a->buffer, offset, out, count); break; | |
case 25: __PackedArray_unpack_25(a->buffer, offset, out, count); break; | |
case 26: __PackedArray_unpack_26(a->buffer, offset, out, count); break; | |
case 27: __PackedArray_unpack_27(a->buffer, offset, out, count); break; | |
case 28: __PackedArray_unpack_28(a->buffer, offset, out, count); break; | |
case 29: __PackedArray_unpack_29(a->buffer, offset, out, count); break; | |
case 30: __PackedArray_unpack_30(a->buffer, offset, out, count); break; | |
case 31: __PackedArray_unpack_31(a->buffer, offset, out, count); break; | |
case 32: __PackedArray_unpack_32(a->buffer, offset, out, count); break; | |
} | |
} | |
void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in) | |
{ | |
uint32_t* __restrict out; | |
uint32_t bitsPerItem; | |
uint32_t startBit; | |
uint32_t bitsAvailable; | |
uint32_t mask; | |
assert(a != NULL); | |
bitsPerItem = a->bitsPerItem; | |
out = &a->buffer[((uint64_t)offset * (uint64_t)bitsPerItem) / 32]; | |
startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32; | |
bitsAvailable = 32 - startBit; | |
mask = (uint32_t)(1ULL << bitsPerItem) - 1; | |
assert(0 == (~mask & in)); | |
if (bitsPerItem <= bitsAvailable) | |
{ | |
out[0] = (out[0] & ~(mask << startBit)) | (in << startBit); | |
} | |
else | |
{ | |
uint32_t low, high; | |
low = in << startBit; | |
high = in >> bitsAvailable; | |
out[0] = (out[0] & ~(mask << startBit)) | low; | |
out[1] = (out[1] & ~(mask >> (32 - startBit))) | high; | |
} | |
} | |
uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset) | |
{ | |
const uint32_t* __restrict in; | |
uint32_t bitsPerItem; | |
uint32_t startBit; | |
uint32_t bitsAvailable; | |
uint32_t mask; | |
uint32_t out; | |
assert(a != NULL); | |
bitsPerItem = a->bitsPerItem; | |
in = &a->buffer[((uint64_t)offset * (uint64_t)bitsPerItem) / 32]; | |
startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32; | |
bitsAvailable = 32 - startBit; | |
mask = (uint32_t)(1ULL << bitsPerItem) - 1; | |
if (bitsPerItem <= bitsAvailable) | |
{ | |
out = (in[0] >> startBit) & mask; | |
} | |
else | |
{ | |
uint32_t low, high; | |
low = in[0] >> startBit; | |
high = in[1] << (32 - startBit); | |
out = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable)); | |
} | |
return out; | |
} | |
uint32_t PackedArray_bufferSize(const PackedArray* a) | |
{ | |
assert(a != NULL); | |
return (uint32_t)(((uint64_t)a->bitsPerItem * (uint64_t)a->count + 31) / 32); | |
} |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "PackedArray.h" | |
#include <assert.h> | |
#include <stddef.h> | |
static void __PackedArray_pack_scalar(uint32_t* buffer, uint32_t bitsPerItem, uint32_t mask, uint32_t offset, const uint32_t* in, uint32_t count) | |
{ | |
uint32_t* __restrict out; | |
uint32_t startBit; | |
uint32_t bitsAvailable; | |
assert(buffer != NULL); | |
assert(in != NULL); | |
while (count--) | |
{ | |
uint32_t value = *in++; | |
assert(0 == (~mask & value)); | |
out = &buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4]; | |
startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; | |
++offset; | |
bitsAvailable = 32 - startBit; | |
if (bitsPerItem <= bitsAvailable) | |
{ | |
out[0] = (out[0] & ~(mask << startBit)) | (value << startBit); | |
} | |
else | |
{ | |
uint32_t low, high; | |
low = value << startBit; | |
high = value >> bitsAvailable; | |
out[0] = (out[0] & ~(mask << startBit)) | low; | |
out[4] = (out[4] & ~(mask >> bitsAvailable)) | high; | |
} | |
} | |
} | |
static void __PackedArray_unpack_scalar(const uint32_t* buffer, uint32_t bitsPerItem, uint32_t mask, uint32_t offset, uint32_t* out, uint32_t count) | |
{ | |
const uint32_t* __restrict in; | |
uint32_t startBit; | |
uint32_t bitsAvailable; | |
assert(buffer != NULL); | |
assert(out != NULL); | |
while (count--) | |
{ | |
uint32_t value; | |
in = &buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4]; | |
startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; | |
++offset; | |
bitsAvailable = 32 - startBit; | |
if (bitsPerItem <= bitsAvailable) | |
{ | |
value = (in[0] >> startBit) & mask; | |
} | |
else | |
{ | |
uint32_t low, high; | |
low = in[0] >> startBit; | |
high = in[4] << bitsAvailable; | |
value = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable)); | |
} | |
*out++ = value; | |
} | |
} | |
#include <emmintrin.h> | |
void __PackedArray_pack_1(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 1, (uint32_t)((1ULL << 1) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 1) / 32 * 4]; | |
startBit = (offset / 4 * 1) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 1) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 1) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 1) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 1) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 1) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 1) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 1) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 1) % 32))); | |
in += 4; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 1) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 1) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 1) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 1) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 1) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 1) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 1) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 1) % 32))); | |
in += 4; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 1) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 1) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 1) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 1) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 1) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 1) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 1) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 1) % 32))); | |
in += 4; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 1) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 1) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 1) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 1) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 1) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 1) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 1) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 1) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_1_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 1) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 1) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 1 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 1 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_1_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 1, (uint32_t)((1ULL << 1) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_1(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 1, (uint32_t)((1ULL << 1) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 1) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_1_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 1) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 1) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_1_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 1, (uint32_t)((1ULL << 1) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_2(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 2, (uint32_t)((1ULL << 2) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 2) / 32 * 4]; | |
startBit = (offset / 4 * 2) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 2) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 2) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 2) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 2) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 2) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 2) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 2) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 2) % 32))); | |
in += 4; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 2) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 2) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 2) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 2) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 2) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 2) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 2) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 2) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 2) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 2) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 2) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 2) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 2) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 2) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 2) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 2) % 32))); | |
in += 4; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 2) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 2) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 2) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 2) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 2) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 2) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 2) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 2) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_2_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 2) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 2) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 2) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 2 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 2 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_2_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 2, (uint32_t)((1ULL << 2) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_2(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 2, (uint32_t)((1ULL << 2) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 2) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_2_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 2) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 2) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_2_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 2, (uint32_t)((1ULL << 2) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_3(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 3, (uint32_t)((1ULL << 3) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 3) / 32 * 4]; | |
startBit = (offset / 4 * 3) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 3) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 3) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 3) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 3) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 3) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 3) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 3) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 3) % 32))); | |
in += 4; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 3) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 3) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 3) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((10 * 3) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 3) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 3) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 3) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 3) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 3) % 32))); | |
in += 4; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 3) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 3) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 3) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 3) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 3) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 3) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((21 * 3) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 3) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 3) % 32))); | |
in += 4; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 3) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 3) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 3) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 3) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 3) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 3) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 3) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 3) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_3_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 3) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((10 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 3) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((21 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 3) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 3) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 3 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 3 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_3_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 3, (uint32_t)((1ULL << 3) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_3(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 3, (uint32_t)((1ULL << 3) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 3) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 10: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((10 * 3) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((10 * 3) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 3) - 1)), (32 - ((10 * 3) % 32))), (32 - ((10 * 3) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((21 * 3) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((21 * 3) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 3) - 1)), (32 - ((21 * 3) % 32))), (32 - ((21 * 3) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_3_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 10: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((10 * 3) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((10 * 3) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 3) - 1)), (32 - ((10 * 3) % 32))), (32 - ((10 * 3) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((21 * 3) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((21 * 3) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 3) - 1)), (32 - ((21 * 3) % 32))), (32 - ((21 * 3) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 3) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 3) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_3_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 3, (uint32_t)((1ULL << 3) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_4(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 4, (uint32_t)((1ULL << 4) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 4) / 32 * 4]; | |
startBit = (offset / 4 * 4) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 4) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 4) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 4) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 4) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 4) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 4) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 4) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 4) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 4) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 4) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 4) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 4) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 4) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 4) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 4) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 4) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 4) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 4) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 4) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 4) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 4) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 4) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 4) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 4) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 4) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 4) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 4) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 4) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_4_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 4) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 4) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 4 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 4 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_4_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 4, (uint32_t)((1ULL << 4) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_4(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 4, (uint32_t)((1ULL << 4) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 4) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_4_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 4) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 4) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_4_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 4, (uint32_t)((1ULL << 4) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_5(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 5, (uint32_t)((1ULL << 5) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 5) / 32 * 4]; | |
startBit = (offset / 4 * 5) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 5) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 5) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 5) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 5) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 5) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 5) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((6 * 5) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 5) % 32))); | |
in += 4; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 5) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 5) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 5) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 5) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((12 * 5) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 5) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 5) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 5) % 32))); | |
in += 4; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 5) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 5) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 5) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((19 * 5) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 5) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 5) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 5) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 5) % 32))); | |
in += 4; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 5) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((25 * 5) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 5) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 5) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 5) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 5) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 5) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 5) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_5_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((6 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((12 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((19 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 5) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((25 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 5) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 5) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 5 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 5 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_5_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 5, (uint32_t)((1ULL << 5) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_5(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 5, (uint32_t)((1ULL << 5) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 5) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 6: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((6 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((6 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((6 * 5) % 32))), (32 - ((6 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((12 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((12 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((12 * 5) % 32))), (32 - ((12 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 19: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((19 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((19 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((19 * 5) % 32))), (32 - ((19 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((25 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((25 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((25 * 5) % 32))), (32 - ((25 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_5_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 6: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((6 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((6 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((6 * 5) % 32))), (32 - ((6 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((12 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((12 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((12 * 5) % 32))), (32 - ((12 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 19: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((19 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((19 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((19 * 5) % 32))), (32 - ((19 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((25 * 5) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((25 * 5) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 5) - 1)), (32 - ((25 * 5) % 32))), (32 - ((25 * 5) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 5) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 5) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_5_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 5, (uint32_t)((1ULL << 5) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_6(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 6, (uint32_t)((1ULL << 6) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 6) / 32 * 4]; | |
startBit = (offset / 4 * 6) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 6) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 6) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 6) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 6) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 6) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((5 * 6) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 6) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 6) % 32))); | |
in += 4; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 6) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 6) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((10 * 6) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 6) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 6) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 6) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 6) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 6) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 6) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 6) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 6) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 6) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 6) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((21 * 6) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 6) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 6) % 32))); | |
in += 4; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 6) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 6) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((26 * 6) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 6) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 6) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 6) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 6) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 6) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_6_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((5 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((10 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 6) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((21 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 6) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((26 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 6) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 6) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 6 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 6 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_6_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 6, (uint32_t)((1ULL << 6) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_6(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 6, (uint32_t)((1ULL << 6) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 6) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 5: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((5 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((5 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((5 * 6) % 32))), (32 - ((5 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 10: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((10 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((10 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((10 * 6) % 32))), (32 - ((10 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((21 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((21 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((21 * 6) % 32))), (32 - ((21 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 26: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((26 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((26 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((26 * 6) % 32))), (32 - ((26 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_6_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((4 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 5: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((5 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((5 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((5 * 6) % 32))), (32 - ((5 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((9 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 10: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((10 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((10 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((10 * 6) % 32))), (32 - ((10 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 13: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((13 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((18 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((21 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((21 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((21 * 6) % 32))), (32 - ((21 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 22: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((22 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 26: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((26 * 6) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((26 * 6) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 6) - 1)), (32 - ((26 * 6) % 32))), (32 - ((26 * 6) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 27: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((27 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 6) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 6) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_6_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 6, (uint32_t)((1ULL << 6) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_7(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 7, (uint32_t)((1ULL << 7) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 7) / 32 * 4]; | |
startBit = (offset / 4 * 7) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 7) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 7) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 7) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 7) % 32))); | |
in += 4; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((4 * 7) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 7) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 7) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 7) % 32))); | |
in += 4; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 7) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((9 * 7) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 7) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 7) % 32))); | |
in += 4; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 7) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((13 * 7) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 7) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 7) % 32))); | |
in += 4; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 7) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 7) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((18 * 7) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 7) % 32))); | |
in += 4; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 7) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 7) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((22 * 7) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 7) % 32))); | |
in += 4; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 7) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 7) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 7) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((27 * 7) % 32))); | |
in += 4; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 7) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 7) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 7) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 7) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_7_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((4 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((9 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((13 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((18 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((22 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 7) % 32))); | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_srli_epi32(in_4, (32 - ((27 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 7) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 7) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
} | |
assert(in == end); | |
if ((count / 4 * 7 + startBit) % 32) | |
{ | |
in_4 = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), ((count / 4 * 7 + startBit - 1) % 32) + 1), _mm_set1_epi32(1)); | |
in_4 = _mm_andnot_si128(mask, in_4); | |
packed = _mm_or_si128(packed, in_4); | |
_mm_storeu_si128((__m128i*)out, packed); | |
} | |
} | |
__PackedArray_pack_7_post: | |
if (post > 0) | |
__PackedArray_pack_scalar(buffer, 7, (uint32_t)((1ULL << 7) - 1), offset, in, post); | |
} | |
void __PackedArray_unpack_7(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) | |
{ | |
uint32_t pre, post; | |
const uint32_t* __restrict in; | |
const uint32_t* __restrict end; | |
__m128i packed, out_4; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_unpack_scalar(buffer, 7, (uint32_t)((1ULL << 7) - 1), offset, out, pre); | |
offset += pre; | |
out += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
in = &buffer[(offset / 4 * 7) / 32 * 4]; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 4: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((4 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((4 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((4 * 7) % 32))), (32 - ((4 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 9: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((9 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((9 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((9 * 7) % 32))), (32 - ((9 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 13: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((13 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((13 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((13 * 7) % 32))), (32 - ((13 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 18: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((18 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((18 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((18 * 7) % 32))), (32 - ((18 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 22: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((22 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((22 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((22 * 7) % 32))), (32 - ((22 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 27: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((27 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((27 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((27 * 7) % 32))), (32 - ((27 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_unpack_7_post; | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
offset_4 = 0; | |
} | |
end = out + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((0 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 1: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((1 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 2: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((2 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 3: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((3 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 4: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((4 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((4 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((4 * 7) % 32))), (32 - ((4 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 5: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((5 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 6: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((6 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 7: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((7 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 8: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((8 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 9: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((9 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((9 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((9 * 7) % 32))), (32 - ((9 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 10: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((10 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 11: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((11 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 12: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((12 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 13: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((13 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((13 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((13 * 7) % 32))), (32 - ((13 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 14: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((14 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 15: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((15 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 16: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((16 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 17: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((17 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 18: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((18 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((18 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((18 * 7) % 32))), (32 - ((18 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 19: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((19 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 20: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((20 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 21: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((21 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 22: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((22 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((22 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((22 * 7) % 32))), (32 - ((22 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 23: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((23 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 24: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((24 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 25: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((25 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 26: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((26 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 27: | |
{ | |
__m128i low, high, mask; | |
low = _mm_srli_epi32(packed, ((27 * 7) % 32)); | |
in += 4; | |
packed = _mm_loadu_si128((const __m128i*)in); | |
high = _mm_slli_epi32(packed, (32 - ((27 * 7) % 32))); | |
mask = _mm_slli_epi32(_mm_srli_epi32(_mm_set1_epi32((uint32_t)((1ULL << 7) - 1)), (32 - ((27 * 7) % 32))), (32 - ((27 * 7) % 32))); | |
out_4 = _mm_or_si128(low, _mm_and_si128(_mm_or_si128(low, high), mask)); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
} | |
if (out == end) break; | |
case 28: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((28 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 29: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((29 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 30: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((30 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
case 31: | |
out_4 = _mm_and_si128(_mm_srli_epi32(packed, ((31 * 7) % 32)), _mm_set1_epi32((uint32_t)((1ULL << 7) - 1))); | |
_mm_storeu_si128((__m128i*)out, out_4); | |
out += 4; | |
if (out == end) break; | |
} | |
assert(out == end); | |
} | |
__PackedArray_unpack_7_post: | |
if (post > 0) | |
__PackedArray_unpack_scalar(buffer, 7, (uint32_t)((1ULL << 7) - 1), offset, out, post); | |
} | |
void __PackedArray_pack_8(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) | |
{ | |
uint32_t pre, post; | |
uint32_t* __restrict out; | |
const uint32_t* __restrict end; | |
uint32_t startBit; | |
__m128i packed, in_4, mask; | |
uint32_t offset_4; | |
pre = (offset + 3) / 4 * 4 - offset; | |
pre = pre > count ? count : pre; | |
if (pre > 0) | |
{ | |
__PackedArray_pack_scalar(buffer, 8, (uint32_t)((1ULL << 8) - 1), offset, in, pre); | |
offset += pre; | |
in += pre; | |
count -= pre; | |
} | |
post = count % 4; | |
count -= post; | |
if (count > 0) | |
{ | |
out = &buffer[(offset / 4 * 8) / 32 * 4]; | |
startBit = (offset / 4 * 8) % 32; | |
packed = _mm_loadu_si128((const __m128i*)out); | |
mask = _mm_sub_epi32(_mm_slli_epi32(_mm_set1_epi32(1), startBit), _mm_set1_epi32(1)); | |
packed = _mm_and_si128(packed, mask); | |
offset_4 = offset % 128; | |
offset += count; | |
if (count >= 128 - offset_4) | |
{ | |
int32_t n; | |
n = (count + offset_4) / 128; | |
count -= 128 * n - offset_4; | |
switch (offset_4 / 4) | |
{ | |
do | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 8) % 32))); | |
in += 4; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 8) % 32))); | |
in += 4; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 8) % 32))); | |
in += 4; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 8) % 32))); | |
in += 4; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 8) % 32))); | |
in += 4; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 8) % 32))); | |
in += 4; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 8) % 32))); | |
in += 4; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 8) % 32))); | |
in += 4; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 8) % 32))); | |
in += 4; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((12 * 8) % 32))); | |
in += 4; | |
case 13: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((13 * 8) % 32))); | |
in += 4; | |
case 14: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((14 * 8) % 32))); | |
in += 4; | |
case 15: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((15 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 16: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((16 * 8) % 32))); | |
in += 4; | |
case 17: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((17 * 8) % 32))); | |
in += 4; | |
case 18: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((18 * 8) % 32))); | |
in += 4; | |
case 19: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((19 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 20: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((20 * 8) % 32))); | |
in += 4; | |
case 21: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((21 * 8) % 32))); | |
in += 4; | |
case 22: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((22 * 8) % 32))); | |
in += 4; | |
case 23: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((23 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 24: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((24 * 8) % 32))); | |
in += 4; | |
case 25: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((25 * 8) % 32))); | |
in += 4; | |
case 26: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((26 * 8) % 32))); | |
in += 4; | |
case 27: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((27 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
case 28: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((28 * 8) % 32))); | |
in += 4; | |
case 29: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((29 * 8) % 32))); | |
in += 4; | |
case 30: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((30 * 8) % 32))); | |
in += 4; | |
case 31: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((31 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
} while (--n > 0); | |
} | |
if (count == 0) | |
goto __PackedArray_pack_8_post; | |
offset_4 = 0; | |
startBit = 0; | |
} | |
end = in + count; | |
switch (offset_4 / 4) | |
{ | |
case 0: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((0 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 1: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((1 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 2: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((2 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 3: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((3 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 4: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((4 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 5: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((5 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 6: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((6 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 7: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((7 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 8: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((8 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 9: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((9 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 10: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((10 * 8) % 32))); | |
in += 4; | |
if (in == end) break; | |
case 11: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = _mm_or_si128(packed, _mm_slli_epi32(in_4, ((11 * 8) % 32))); | |
in += 4; | |
_mm_storeu_si128((__m128i*)out, packed); | |
out += 4; | |
packed = _mm_setzero_si128(); | |
if (in == end) break; | |
case 12: | |
in_4 = _mm_loadu_si128((const __m128i*)in); | |
packed = |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
PackedArray.pp.c
generated from https://github.com/gpakosz/PackedArray/blob/master/PackedArray.cPackedArraySIMD.pp.c
generated from https://github.com/gpakosz/PackedArray/blob/master/PackedArraySIMD.c