Skip to content

Instantly share code, notes, and snippets.

@UncleBill
Forked from preshing/sort1mb.cpp
Created October 28, 2012 12:51
Show Gist options
  • Save UncleBill/3968519 to your computer and use it in GitHub Desktop.
Save UncleBill/3968519 to your computer and use it in GitHub Desktop.
Sort one million 8-digit numbers in 1MB RAM
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef unsigned int u32;
typedef unsigned long long u64;
//-------------------------------------------------------------------------
// WorkArea
//-------------------------------------------------------------------------
namespace WorkArea
{
static const u32 circularSize = 253250;
u32 circular[circularSize] = { 0 }; // consumes 1013000 bytes
static const u32 stageSize = 7500;
u32 stage[stageSize]; // consumes 30000 bytes
u32* headPtr = circular;
u32 numDeltas = 0;
u32 numStaged = 0;
}
//-------------------------------------------------------------------------
// Lookup table
//-------------------------------------------------------------------------
static const u32 LUTsize = 64;
const u32 LUT[LUTsize] = {
0x00000000, 0x028c1816, 0x0511b322, 0x0790e1aa,
0x0a09b40c, 0x0c7c3a7a, 0x0ee884ff, 0x114ea37c,
0x13aea5a9, 0x16089b18, 0x185c9331, 0x1aaa9d36,
0x1cf2c842, 0x1f352349, 0x2171bd1a, 0x23a8a45d,
0x25d9e796, 0x28059523, 0x2a2bbb3e, 0x2c4c67fc,
0x2e67a94f, 0x307d8d04, 0x328e20c8, 0x34997221,
0x369f8e76, 0x38a0830a, 0x3a9c5cff, 0x3c932955,
0x3e84f4eb, 0x4071cc80, 0x4259bcb1, 0x443cd1fd,
0x461b18c1, 0x47f49d3c, 0x49c96b8d, 0x4b998fb4,
0x4d651594, 0x4f2c08ef, 0x50ee756c, 0x52ac6692,
0x5465e7cc, 0x561b0467, 0x57cbc794, 0x59783c68,
0x5b206dda, 0x5cc466c5, 0x5e6431ec, 0x5fffd9f1,
0x61976960, 0x632aeaa8, 0x64ba681c, 0x6645ebf7,
0x67cd8059, 0x69512f48, 0x6ad102b2, 0x6c4d0468,
0x6dc53e27, 0x6f39b98f, 0x70aa802a, 0x72179b68,
0x738114a3, 0x74e6f51b, 0x764945f9, 0x77a81050
};
static const u32 bit2 = 1u << 30;
//-------------------------------------------------------------------------
// Range
//-------------------------------------------------------------------------
struct Range
{
u32 lo;
u32 range;
Range() { lo = 0; range = ~0u; }
void set(u32 l, u32 h) { lo = l; range = h - l; }
u32 lerp(u64 x) { return (u32) ((range * x) >> 32) + lo; }
};
//-------------------------------------------------------------------------
// BitReader
//-------------------------------------------------------------------------
struct BitReader
{
u32* readPtr;
u32 readBit;
BitReader()
{
readPtr = WorkArea::circular;
readBit = 32;
}
u32 readOneBit()
{
u32 bit = (*readPtr >> --readBit) & 1;
if (readBit == 0)
{
readBit = 32;
if (++readPtr == WorkArea::circular + WorkArea::circularSize)
readPtr = WorkArea::circular;
}
return bit;
}
};
//-------------------------------------------------------------------------
// Decoder
//-------------------------------------------------------------------------
struct Decoder : BitReader
{
Range readRange;
u32 readSeq;
u32 readSeqBits;
Decoder() : BitReader()
{
readSeq = 0;
readSeqBits = 1;
}
u32 decode()
{
for (; readSeqBits < 32; readSeqBits++)
readSeq = (readSeq << 1) | readOneBit();
u32 a = 0;
u32 b = LUTsize;
u32 readRel = readSeq - readRange.lo;
u32 relA = 0;
u32 relB = readRange.range;
while (b > a + 1)
{
u32 mid = (a + b) >> 1;
u32 rel = readRange.lerp(LUT[mid]) - readRange.lo;
if (readRel >= rel)
{
a = mid;
relA = rel;
}
else
{
b = mid;
relB = rel;
}
}
u32 seqA = relA + readRange.lo;
u32 seqB = relB + readRange.lo;
assert(seqA != seqB);
while ((int) (seqA ^ seqB) >= 0)
{
readSeqBits--;
seqA <<= 1;
seqB <<= 1;
}
if ((int) readRange.lo >= 0)
assert((int) seqA >= 0 && (int) seqB < 0);
while ((seqA & bit2) && !(seqB & bit2))
{
readSeqBits--;
seqA <<= 1;
seqB <<= 1;
}
readRange.set(seqA, seqB - 1);
return a;
}
u32 popDelta()
{
u32 value = 0;
for (;;)
{
u32 t = decode();
value += t;
if (t < LUTsize - 1)
return value;
}
}
void reset()
{
readPtr = WorkArea::headPtr;
readBit = 32;
readRange.set(0, ~0u);
readSeq = 0;
readSeqBits = 0;
}
};
Decoder g_Decoder;
//-------------------------------------------------------------------------
// BitWriter
//-------------------------------------------------------------------------
struct BitWriter
{
u32* writePtr;
u32 writeBit;
BitWriter()
{
writePtr = WorkArea::circular;
writeBit = 32;
}
void writeOneBit(u32 bit) // 0 or 1
{
*writePtr |= (1 << --writeBit) & -(int) bit;
if (writeBit == 0)
{
writeBit = 32;
if (++writePtr == WorkArea::circular + WorkArea::circularSize)
writePtr = WorkArea::circular;
if (writePtr == g_Decoder.readPtr)
abort(); // Overflow detection
*writePtr = 0;
}
}
};
//-------------------------------------------------------------------------
// Encoder
//-------------------------------------------------------------------------
struct Encoder : BitWriter
{
Range writeRange;
void addCarry()
{
u32* w = writePtr;
for (u32 b = 2u << (writeBit - 1);; b <<= 1)
{
if (b == 0)
{
b = 1;
if (--w == WorkArea::circular - 1)
w = WorkArea::circular + WorkArea::circularSize - 1;
}
*w ^= b;
if (*w & b)
break;
}
}
void encode(u32 value)
{
u32 seqA = writeRange.lerp(LUT[value]);
u32 seqB = writeRange.lerp(value < LUTsize - 1 ? LUT[value + 1] : 1ull << 32);
assert(seqA != seqB);
if ((int) writeRange.lo < 0 && (int) seqA >= 0)
addCarry();
while ((int) (seqA ^ seqB) >= 0)
{
writeOneBit(seqA >> 31);
seqA <<= 1;
seqB <<= 1;
}
if ((int) writeRange.lo >= 0)
assert((int) seqA >= 0 && (int) seqB < 0);
while ((seqA & bit2) && !(seqB & bit2))
{
writeOneBit(seqA >> 31);
seqA <<= 1;
seqB <<= 1;
}
writeRange.set(seqA, seqB - 1);
}
void pushDelta(u32 value)
{
while (value >= LUTsize - 1)
{
encode(LUTsize - 1);
value -= LUTsize - 1;
}
encode(value);
}
void flush()
{
encode(LUTsize / 2);
for (u32 i = 32; --i > 0;)
writeOneBit((writeRange.lo >> i) & 1);
while (writeBit != 32)
writeOneBit(0);
writeRange.set(0, ~0u);
}
};
Encoder g_Encoder;
//-------------------------------------------------------------------------
// mergeStageToCircular
//-------------------------------------------------------------------------
inline int u32Compare(const u32* a, const u32* b) { return *a - *b; }
void mergeStageToCircular()
{
using namespace WorkArea;
qsort(stage, numStaged, 4, (int (*)(const void*, const void*)) u32Compare);
u32 i = 0;
u32 j = 0;
u32 prev = 0;
u32 iValue = i < numDeltas ? g_Decoder.popDelta() : ~0u;
u32 jValue = j < numStaged ? stage[j] : ~0u;
while (i < numDeltas || j < numStaged)
{
if (iValue < jValue)
{
g_Encoder.pushDelta(iValue - prev);
prev = iValue;
iValue = ++i < numDeltas ? iValue + g_Decoder.popDelta() : ~0u;
}
else
{
g_Encoder.pushDelta(jValue - prev);
prev = jValue;
jValue = ++j < numStaged ? stage[j] : ~0u;
}
}
numDeltas += numStaged;
numStaged = 0;
g_Encoder.flush();
g_Decoder.reset();
headPtr = g_Encoder.writePtr;
}
//-------------------------------------------------------------------------
// main
//-------------------------------------------------------------------------
int main(int argc, char* argv[])
{
// Read input
for (;;)
{
int value;
if (scanf("%d", &value) != 1)
break;
if (WorkArea::numStaged >= WorkArea::stageSize)
mergeStageToCircular();
WorkArea::stage[WorkArea::numStaged++] = value;
}
if (WorkArea::numStaged > 0)
mergeStageToCircular();
// Write output
u32 value = 0;
for (u32 i = 0; i < WorkArea::numDeltas; i++)
{
value += g_Decoder.popDelta();
printf("%08d\n", value);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment