Last active
May 29, 2017 16:32
-
-
Save drydenp/23f3893c59e685134f5bfba81165a24a to your computer and use it in GitHub Desktop.
Temporary addition to a fleeting experience
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "sparsepack.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <inttypes.h> | |
struct context { | |
char *cur_buf; | |
struct chunk_h header; | |
int buf_read; // buf_read means data read | |
int zero_read; // zero read means empty blocks read | |
int block_size; | |
}; | |
struct statistics { | |
off_t zero_chunks_written; | |
off_t data_chunks_written; | |
off_t total_chunks_written; | |
off_t zero_blocks_written; | |
off_t data_blocks_written; | |
off_t total_blocks_written; | |
}; | |
void fixup(struct statistics *s) { | |
s->total_chunks_written = s->zero_chunks_written + s->data_chunks_written; | |
s->total_blocks_written = s->zero_blocks_written + s->data_blocks_written; | |
} | |
int block_size = DEF_BLOCK_SIZE; | |
bool has_data(char* chunk, int size) { | |
while (size -= sizeof(int)) { | |
if (*((int*)chunk)) { | |
return true; | |
} | |
chunk += sizeof(int); | |
} | |
return false; | |
} | |
void write_zero_header(struct context *ctx, struct statistics *stats) { | |
ctx->header.type = unused; | |
ctx->header.size = ctx->zero_read; | |
fprintf(stderr, "Writing zero chunk of %d blocks\n", ctx->zero_read); | |
fwrite(&ctx->header, sizeof(ctx->header), 1, stdout); | |
stats->zero_chunks_written++; | |
stats->zero_blocks_written += ctx->zero_read; | |
ctx->zero_read = 0; | |
} | |
void write_data_header(struct context *ctx, char *full_buf, struct statistics *stats) { | |
ctx->header.type = used; | |
ctx->header.size = ctx->buf_read; | |
fprintf(stderr, "Writing data chunk of %d blocks\n", ctx->buf_read); | |
fwrite(&ctx->header, sizeof(ctx->header), 1, stdout); | |
fwrite(full_buf, ctx->block_size, ctx->buf_read, stdout); | |
stats->data_chunks_written++; | |
stats->data_blocks_written += ctx->buf_read; | |
ctx->buf_read = 0; | |
ctx->cur_buf = full_buf; | |
} | |
struct statistics stats = {0, 0, 0, 0}; | |
int main(char args[]) { | |
char *buf = malloc(block_size * MAX_BLOCK_COUNT); | |
struct context ctx = {0}; | |
int latest; | |
ctx.cur_buf = buf; | |
ctx.block_size = block_size; | |
// so what are our cases? | |
// 1. data has been read and we read a zero block. | |
// 2. data has been read and we read a data block. | |
// 3. data has not been read and we read a zero block. | |
// 4. data has not been read and we read a data block. | |
// this code will fail to read a partial block at the end. | |
fwrite(&our_header, sizeof(struct spaf_h), 1, stdout); | |
while (latest = fread(ctx.cur_buf, block_size, 1, stdin) == 1) { | |
// 1. scan the chunk for data | |
if (has_data(ctx.cur_buf, block_size)) { | |
// case 1: we read data but we had zeroes before. | |
if (ctx.zero_read > 0) { | |
// we write out a zero header: | |
write_zero_header(&ctx, &stats); | |
ctx.buf_read = 1; | |
ctx.cur_buf += block_size; | |
} else { | |
// case 2: we may or may not have read data before. | |
ctx.buf_read++; | |
if (ctx.buf_read == MAX_BLOCK_COUNT) { | |
// write out chunk | |
write_data_header(&ctx, buf, &stats); | |
} else { | |
ctx.cur_buf += block_size; | |
} | |
} | |
} else { | |
// case 4: we read zero but we have existing data | |
if (ctx.buf_read > 0) { | |
write_data_header(&ctx, buf, &stats); | |
ctx.zero_read = 1; | |
} else { | |
// case 3: we may or may not have read zero before: | |
ctx.zero_read++; | |
// check whether we are exceeding the maximum chunk size. | |
// for 4k blocks this is 64k * 4k = 256MB. | |
// that's not a lot... | |
if (ctx.zero_read == (1 << (sizeof(unsigned short) * 8)) - 1) { | |
write_zero_header(&ctx, &stats); | |
} | |
} | |
} | |
} | |
if (ctx.zero_read > 0) { | |
write_zero_header(&ctx, &stats); | |
} else { | |
write_data_header(&ctx, buf, &stats); | |
} | |
fixup(&stats); | |
fprintf(stderr, "Bytes read %" PRIu64 ", data chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Zero chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Total blocks written %" PRIu64 " and total chunks %d.\n", stats.total_blocks_written * block_size, stats.data_chunks_written, stats.data_blocks_written, stats.data_blocks_written * block_size, stats.zero_chunks_written, stats.zero_blocks_written, stats.zero_blocks_written * block_size, stats.total_blocks_written, stats.total_chunks_written); | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define MAGIC "SPAF" | |
#define VERSION "10" | |
#define false 0 | |
#define true 1 | |
#define MAX_BLOCK_COUNT 4096 | |
#define DEF_BLOCK_SIZE 4096 | |
/** | |
* This macro changes off_t to 64 bits, and ftruncate to 64 bits. | |
*/ | |
#define _FILE_OFFSET_BITS 64 | |
#include <stdint.h> | |
/* | |
* Although I don't really see why I shouldn't use 64-bit versions directly. | |
* fprintf requires the use of PRIu64 or PRIi64 macros to select the proper long type to get at 64 bits. | |
*/ | |
enum checksum_algos { | |
algo_crc32 = 0, | |
algo_md5sum | |
}; | |
typedef unsigned char bool; | |
typedef unsigned char byte; | |
struct spaf_h { // 16 bytes | |
char magic[4]; // "SPAF" | |
char version[2]; // "10" | |
bool hammington_used:8; // unused, could be used to get a kind of ECC correction without using ECC | |
// memory, but would probably be rather slow. | |
byte hammington_block_size:8; // would have to be 247 with 8 parity bits and one unused extra parity. | |
// but this is the data part. The full code is (255, 247) and matrices could | |
// be downloaded at [1] | |
// The output of the matrix is the 8 parity bit position values that can | |
// indicate an error. To be error free, the computation of this vector | |
// needs to be the zero vector. | |
bool checksum_used:8; // 9 | |
unsigned short checksum_bits:16; // 11 | |
enum checksum_algos checksum_algo:8; // 12 | |
uint32_t block_size:32; // 16 | |
} __attribute__((packed)); | |
// [1] http://www.uni-kl.de/en/channel-codes/channel-codes-database/bch-and-hamming/ | |
enum block_type { | |
used = 0, | |
unused | |
}; | |
struct chunk_h { | |
enum block_type type:8; | |
unsigned short size:16; | |
byte padding:8; | |
} __attribute__((packed)); | |
struct chunk_h_checksum { | |
enum block_type type:8; | |
unsigned short size:16; | |
unsigned long checksum:32; | |
} __attribute__((packed)); | |
struct spaf_h our_header = { | |
MAGIC, | |
VERSION, | |
false, | |
247, | |
false, | |
32, | |
algo_crc32, | |
4096 | |
}; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "sparsepack.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <inttypes.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <sys/unistd.h> | |
struct statistics { | |
off_t zero_chunks_read; | |
off_t data_chunks_read; | |
off_t total_chunks_read; | |
off_t zero_blocks_read; | |
off_t data_blocks_read; | |
off_t total_blocks_read; | |
}; | |
void fixup(struct statistics *s) { | |
s->total_chunks_read = s->zero_chunks_read + s->data_chunks_read; | |
s->total_blocks_read = s->zero_blocks_read + s->data_blocks_read; | |
} | |
enum file_type { | |
ft_invalid = 0, | |
ft_file, | |
ft_block, | |
ft_pipe | |
}; | |
enum file_type obtain_file_type() { | |
int fd = fileno(stdout); | |
struct stat buf; | |
fstat(fd, &buf); | |
if (buf.st_mode & (S_IFREG | S_IFLNK)) return ft_file; | |
if (buf.st_mode & S_IFBLK) return ft_block; | |
if (buf.st_mode & S_IFIFO) return ft_pipe; | |
return ft_invalid; | |
} | |
int main(char args[]) { | |
fprintf(stderr, "Sizeof %d\n", sizeof(off_t)); | |
exit(1); | |
char *buf = malloc(DEF_BLOCK_SIZE * MAX_BLOCK_COUNT); | |
struct chunk_h chunk_header; | |
struct spaf_h my_header; | |
struct statistics stats = {0}; | |
char *zero; | |
int read, i, res; | |
enum block_type last; | |
enum file_type stdout_type = obtain_file_type(); | |
fprintf(stderr, "File type is %s.\n", stdout_type == ft_file ? "file" : (stdout_type == ft_block) ? "block device" : "pipe"); | |
if (stdout_type == ft_invalid) { | |
fprintf(stderr, "Cannot write to this output.\n"); | |
return 1; | |
} | |
fread(&my_header, sizeof(struct spaf_h), 1, stdin); | |
if (memcmp(&my_header.magic, MAGIC, 4) == 0 && memcmp(&my_header.version, VERSION, 2) == 0) { | |
fprintf(stderr, "Valid SPAF header found in input stream.\n"); | |
} else { | |
fprintf(stderr, "No valid SPAF header found in input stream.\n"); | |
return 1; | |
} | |
fprintf(stderr, "Block size %d.\n", my_header.block_size); | |
zero = malloc(my_header.block_size); | |
memset(zero, 0, my_header.block_size); | |
while (fread(&chunk_header, sizeof(struct chunk_h), 1, stdin)) { | |
fprintf(stderr, "Decoding %s chunk of %d blocks\n", chunk_header.type == used ? "data" : "zero", chunk_header.size); | |
if (chunk_header.type == used) { | |
read = fread(buf, my_header.block_size, chunk_header.size, stdin); | |
fwrite(buf, my_header.block_size, chunk_header.size, stdout); | |
stats.data_chunks_read += 1; | |
stats.data_blocks_read += chunk_header.size; | |
} else { | |
// if it is a regular file, then seek and truncate at the end, creating a sparse file. | |
if (stdout_type == ft_file || stdout_type == ft_block) { | |
fseek(stdout, chunk_header.size * my_header.block_size, SEEK_CUR); | |
} else { | |
for (i = 0; i < chunk_header.size; i++) { | |
fwrite(zero, my_header.block_size, 1, stdout); | |
} | |
} | |
stats.zero_chunks_read += 1; | |
stats.zero_blocks_read += chunk_header.size; | |
} | |
} | |
fixup(&stats); | |
if (chunk_header.type == unused && stdout_type == ft_file) { | |
fprintf(stderr, "Truncating file at %" PRIu64 " bytes.\n", stats.total_blocks_read * my_header.block_size); | |
res = ftruncate(fileno(stdout), stats.total_blocks_read * my_header.block_size); | |
/*if (res) { | |
fprintf(stderr, "Truncate error.\n"); | |
}*/ | |
} | |
fprintf(stderr, "Total blocks processed: %" PRIu64 ".", stats.total_blocks_read); | |
fprintf(stderr, " Total chunks: %" PRIu64 ".", stats.total_chunks_read); | |
fprintf(stderr, " Zero: %" PRIu64 "/%" PRIu64 ", data: %" PRIu64 "/%" PRIu64 ". Data %: %04.2f\n", | |
stats.zero_blocks_read, | |
stats.zero_chunks_read, | |
stats.data_blocks_read, | |
stats.data_chunks_read, | |
stats.data_blocks_read / (double)stats.total_blocks_read); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment