Skip to content

Instantly share code, notes, and snippets.

@argv0
Created September 22, 2011 23:42
Show Gist options
  • Save argv0/1236361 to your computer and use it in GitHub Desktop.
Save argv0/1236361 to your computer and use it in GitHub Desktop.
#include <fcntl.h>
#include <unistd.h>
#include <vector>
uint64_t *polynomial_lookup_buf;
struct rab_poly
{
rab_poly()
: start(0), length(0), value(0) {}
rab_poly(uint64_t s, uint64_t l, uint64_t v)
: start(s), length(l), value(v) {}
uint64_t start;
uint64_t length;
uint64_t value;
};
static const std::size_t MAX_BLOCK_SIZE = 4096;
static const std::size_t MIN_BLOCK_SIZE = 1024;
static const std::size_t POLY_REM = 3;
static const std::size_t AVG_BLOCK_SIZE = 2048;
static const std::size_t WINDOW_SIZE = 48;
static const std::size_t MIN_WINDOW_SIZE = 17;
static const std::size_t MAX_WINDOW_SIZE = 63;
static const std::size_t READ_BUF_SIZE = 1048576;
struct rab_window
{
rab_window(std::size_t s)
: size(s),
checksum(0),
data(size),
pos(0) {}
std::size_t size;
uint64_t checksum;
std::vector<char> data;
std::size_t pos;
};
struct rab_reader
{
rab_reader(std::size_t window_size)
: blocks(1),
window(window_size),
bytes_read(0),
current_poly_finished(false),
current_rolling_checksum(0),
cur_block(&(blocks[0])) {}
public:
void read_block(void *buf, std::size_t size)
{
if (current_poly_finished)
{
rab_poly p;
blocks.push_back(p);
cur_block = &blocks.back();
current_poly_finished = false;
}
for (std::size_t i=0;i<size;i++)
{
char cur_byte(reinterpret_cast<char *>(buf)[i]);
char pushed_out(window.data[window.pos]);
window.data[window.pos]=cur_byte;
current_rolling_checksum = (current_rolling_checksum*POLY_REM)+cur_byte;
cur_block->value = (cur_block->value*POLY_REM)+cur_byte;
current_rolling_checksum -= (pushed_out*polynomial_lookup_buf[WINDOW_SIZE]);
++(window.pos);
++bytes_read;
++(cur_block->length);
if (window.pos == window.size)
window.pos = 0;
if ((cur_block->length >= MIN_BLOCK_SIZE &&
(current_rolling_checksum % AVG_BLOCK_SIZE) == POLY_REM) ||
cur_block->length == MAX_BLOCK_SIZE)
{
cur_block->start = bytes_read-cur_block->length;
printf("%u %u\n", cur_block->start, cur_block->length);
rab_poly p;
blocks.push_back(p);
cur_block = &blocks.back();
}
if (i == (size-1))
{
current_poly_finished=true;
}
}
}
std::vector<rab_poly> blocks;
rab_window window;
uint64_t bytes_read;
bool current_poly_finished;
uint64_t current_rolling_checksum;
rab_poly* cur_block;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment