drydenp · May 29, 2017 16:32
diff --git a/sparsepack.c b/sparsepack.c
 #include "sparsepack.h"

 #include <stdio.h>
 #include <stdlib.h>
 #include <inttypes.h>



 struct context {
 	char *cur_buf;
 	struct chunk_h header;
 	int buf_read;  // buf_read means data read
 	int zero_read; // zero read means empty blocks read
 	int block_size;
 };

 struct statistics {
 	off_t zero_chunks_written;
 	off_t data_chunks_written;
 	off_t total_chunks_written;

 	off_t zero_blocks_written;
 	off_t data_blocks_written;
 	off_t total_blocks_written;
 };

 void fixup(struct statistics *s) {
 	s->total_chunks_written = s->zero_chunks_written + s->data_chunks_written;
 	s->total_blocks_written = s->zero_blocks_written + s->data_blocks_written;
 }

 int block_size = DEF_BLOCK_SIZE;

 bool has_data(char* chunk, int size) {
 	while (size -= sizeof(int)) {
 		if (*((int*)chunk)) {
 			return true;
 		}
 		chunk += sizeof(int);
 	}
 	return false;
 }

 void write_zero_header(struct context *ctx, struct statistics *stats) {
 	ctx->header.type = unused;
 	ctx->header.size = ctx->zero_read;
 	fprintf(stderr, "Writing zero chunk of %d blocks\n", ctx->zero_read);
 	fwrite(&ctx->header, sizeof(ctx->header), 1, stdout);

 	stats->zero_chunks_written++;
 	stats->zero_blocks_written += ctx->zero_read;

 	ctx->zero_read = 0;
 }

 void write_data_header(struct context *ctx, char *full_buf, struct statistics *stats) {
 	ctx->header.type = used;
 	ctx->header.size = ctx->buf_read;
 	fprintf(stderr, "Writing data chunk of %d blocks\n", ctx->buf_read);
 	fwrite(&ctx->header, sizeof(ctx->header), 1, stdout);
 	fwrite(full_buf, ctx->block_size, ctx->buf_read, stdout);

 	stats->data_chunks_written++;
 	stats->data_blocks_written += ctx->buf_read;

 	ctx->buf_read = 0;
 	ctx->cur_buf = full_buf;
 }

 struct statistics stats = {0, 0, 0, 0};

 int main(char args[]) {
 	char *buf = malloc(block_size * MAX_BLOCK_COUNT);
 	struct context ctx = {0};
 	int latest;

 	ctx.cur_buf = buf;
 	ctx.block_size = block_size;
 	
 	// so what are our cases?
 	// 1. data has been read and we read a zero block.
 	// 2. data has been read and we read a data block.
 	// 3. data has not been read and we read a zero block.
 	// 4. data has not been read and we read a data block.
 	
 	// this code will fail to read a partial block at the end.

 	fwrite(&our_header, sizeof(struct spaf_h), 1, stdout);

 	while (latest = fread(ctx.cur_buf, block_size, 1, stdin) == 1) {
 		// 1. scan the chunk for data
 		if (has_data(ctx.cur_buf, block_size)) {
 			// case 1: we read data but we had zeroes before.

 			if (ctx.zero_read > 0) {
 				// we write out a zero header:
 				write_zero_header(&ctx, &stats);
 				ctx.buf_read = 1;
 				ctx.cur_buf += block_size;
 			} else {
 			// case 2: we may or may not have read data before.
 				ctx.buf_read++;
 				if (ctx.buf_read == MAX_BLOCK_COUNT) {
 					// write out chunk
 					write_data_header(&ctx, buf, &stats);
 				} else {
 					ctx.cur_buf += block_size;
 				}
 			}
 		} else {
 			// case 4: we read zero but we have existing data
 			if (ctx.buf_read > 0) {
 				write_data_header(&ctx, buf, &stats);
 				ctx.zero_read = 1;
 			} else {
 			// case 3: we may or may not have read zero before:
 				ctx.zero_read++;
 				// check whether we are exceeding the maximum chunk size.
 				// for 4k blocks this is 64k * 4k = 256MB.
 				// that's not a lot...
 				if (ctx.zero_read == (1 << (sizeof(unsigned short) * 8)) - 1) {
 					write_zero_header(&ctx, &stats);
 				}
 			}
 		}
 	}
 	if (ctx.zero_read > 0) {
 		write_zero_header(&ctx, &stats);
 	} else {
 		write_data_header(&ctx, buf, &stats);
 	}

 	fixup(&stats);

 	fprintf(stderr, "Bytes read %" PRIu64 ", data chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Zero chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Total blocks written %" PRIu64 " and total chunks %d.\n", stats.total_blocks_written * block_size, stats.data_chunks_written, stats.data_blocks_written, stats.data_blocks_written * block_size, stats.zero_chunks_written, stats.zero_blocks_written, stats.zero_blocks_written * block_size, stats.total_blocks_written, stats.total_chunks_written);
 }

diff --git a/sparsepack.h b/sparsepack.h
 #define MAGIC "SPAF"
 #define VERSION "10"

 #define false 0
 #define true 1

 #define MAX_BLOCK_COUNT 4096
 #define DEF_BLOCK_SIZE 4096

 /**
 *  This macro changes off_t to 64 bits, and ftruncate to 64 bits.
 */

 #define _FILE_OFFSET_BITS 64

 #include <stdint.h>

 /*
 *  Although I don't really see why I shouldn't use 64-bit versions directly.
 *  fprintf requires the use of PRIu64 or PRIi64 macros to select the proper long type to get at 64 bits.
 */

 enum checksum_algos {
 	algo_crc32 = 0,
 	algo_md5sum
 };

 typedef unsigned char bool;
 typedef unsigned char byte;

 struct spaf_h {                         // 16 bytes
 	char magic[4];                      // "SPAF"
 	char version[2];                    // "10"
 	bool hammington_used:8;             // unused, could be used to get a kind of ECC correction without using ECC
 	                                    // memory, but would probably be rather slow.
 	byte hammington_block_size:8;       // would have to be 247 with 8 parity bits and one unused extra parity.
 	                                    // but this is the data part. The full code is (255, 247) and matrices could
 										// be downloaded at [1]
 										// The output of the matrix is the 8 parity bit position values that can
 										// indicate an error. To be error free, the computation of this vector
 										// needs to be the zero vector.

 	bool checksum_used:8;                 //   9
 	unsigned short checksum_bits:16;      //  11
 	enum checksum_algos checksum_algo:8;  //  12
 	uint32_t block_size:32;          //  16
 } __attribute__((packed));

 // [1] http://www.uni-kl.de/en/channel-codes/channel-codes-database/bch-and-hamming/

 enum block_type {
 	used = 0,
 	unused
 };

 struct chunk_h {

 	enum block_type type:8;
 	unsigned short size:16;
 	byte padding:8;

 } __attribute__((packed));

 struct chunk_h_checksum {
 	enum block_type type:8;
 	unsigned short size:16;
 	unsigned long checksum:32;
 } __attribute__((packed));
 	


 struct spaf_h our_header = {
 	MAGIC,
 	VERSION,
 	false,
 	247,
 	false,
 	32,
 	algo_crc32,
 	4096
 };


diff --git a/sparseunpack.c b/sparseunpack.c
 #include "sparsepack.h"

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <inttypes.h>

 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>

 struct statistics {
 	off_t zero_chunks_read;
 	off_t data_chunks_read;
 	off_t total_chunks_read;

 	off_t zero_blocks_read;
 	off_t data_blocks_read;
 	off_t total_blocks_read;
 };

 void fixup(struct statistics *s) {
 	s->total_chunks_read = s->zero_chunks_read + s->data_chunks_read;
 	s->total_blocks_read = s->zero_blocks_read + s->data_blocks_read;
 }

 enum file_type {
 	ft_invalid = 0,
 	ft_file,
 	ft_block,
 	ft_pipe
 };

 enum file_type obtain_file_type() {
 	int fd = fileno(stdout);
 	struct stat buf;
 	fstat(fd, &buf);
 	if (buf.st_mode & (S_IFREG | S_IFLNK)) return ft_file;
 	if (buf.st_mode & S_IFBLK) return ft_block;
 	if (buf.st_mode & S_IFIFO) return ft_pipe;
 	return ft_invalid;
 }


 int main(char args[]) {
 	fprintf(stderr, "Sizeof %d\n", sizeof(off_t));
 	exit(1);

 	char *buf = malloc(DEF_BLOCK_SIZE * MAX_BLOCK_COUNT);
 	struct chunk_h chunk_header;

 	struct spaf_h my_header;
 	struct statistics stats = {0};

 	char *zero;

 	int read, i, res;
 	enum block_type last;

 	enum file_type stdout_type = obtain_file_type();

 	fprintf(stderr, "File type is %s.\n", stdout_type == ft_file ? "file" : (stdout_type == ft_block) ? "block device" : "pipe");

 	if (stdout_type == ft_invalid) {
 		fprintf(stderr, "Cannot write to this output.\n");
 		return 1;
 	}

 	fread(&my_header, sizeof(struct spaf_h), 1, stdin);

 	if (memcmp(&my_header.magic, MAGIC, 4) == 0 && memcmp(&my_header.version, VERSION, 2) == 0) {
 		fprintf(stderr, "Valid SPAF header found in input stream.\n");
 	} else {
 		fprintf(stderr, "No valid SPAF header found in input stream.\n");
 		return 1;
 	}

 	fprintf(stderr, "Block size %d.\n", my_header.block_size);

 	zero = malloc(my_header.block_size);
 	memset(zero, 0, my_header.block_size);

 	while (fread(&chunk_header, sizeof(struct chunk_h), 1, stdin)) {
 		fprintf(stderr, "Decoding %s chunk of %d blocks\n", chunk_header.type == used ? "data" : "zero", chunk_header.size);

 		if (chunk_header.type == used) {
 			read = fread(buf, my_header.block_size, chunk_header.size, stdin);
 			fwrite(buf, my_header.block_size, chunk_header.size, stdout);
 			stats.data_chunks_read += 1;
 			stats.data_blocks_read += chunk_header.size;
 		} else {
 			// if it is a regular file, then seek and truncate at the end, creating a sparse file.
 			if (stdout_type == ft_file || stdout_type == ft_block) {
 				fseek(stdout, chunk_header.size * my_header.block_size, SEEK_CUR);
 			} else {
 				for (i = 0; i < chunk_header.size; i++) {
 					fwrite(zero, my_header.block_size, 1, stdout);
 				}
 			}
 			stats.zero_chunks_read += 1;
 			stats.zero_blocks_read += chunk_header.size;
 		}
 	}
 	fixup(&stats);

 	if (chunk_header.type == unused && stdout_type == ft_file) {
 		fprintf(stderr, "Truncating file at %" PRIu64 " bytes.\n", stats.total_blocks_read * my_header.block_size);
 		res = ftruncate(fileno(stdout), stats.total_blocks_read * my_header.block_size);
 		/*if (res) {
 			fprintf(stderr, "Truncate error.\n");
 		}*/
 	}

 	fprintf(stderr, "Total blocks processed: %" PRIu64 ".", stats.total_blocks_read);
 	fprintf(stderr, " Total chunks: %" PRIu64 ".", stats.total_chunks_read);
 	fprintf(stderr, " Zero: %" PRIu64 "/%" PRIu64 ", data: %" PRIu64 "/%" PRIu64 ". Data %: %04.2f\n", 
 		stats.zero_blocks_read,
 		stats.zero_chunks_read,
 		stats.data_blocks_read,
 		stats.data_chunks_read,
 		stats.data_blocks_read / (double)stats.total_blocks_read);
 }
	#include "sparsepack.h"

	#include <stdio.h>
	#include <stdlib.h>
	#include <inttypes.h>



	struct context {
	char *cur_buf;
	struct chunk_h header;
	int buf_read; // buf_read means data read
	int zero_read; // zero read means empty blocks read
	int block_size;
	};

	struct statistics {
	off_t zero_chunks_written;
	off_t data_chunks_written;
	off_t total_chunks_written;

	off_t zero_blocks_written;
	off_t data_blocks_written;
	off_t total_blocks_written;
	};

	void fixup(struct statistics *s) {
	s->total_chunks_written = s->zero_chunks_written + s->data_chunks_written;
	s->total_blocks_written = s->zero_blocks_written + s->data_blocks_written;
	}

	int block_size = DEF_BLOCK_SIZE;

	bool has_data(char* chunk, int size) {
	while (size -= sizeof(int)) {
	if (((int)chunk)) {
	return true;
	}
	chunk += sizeof(int);
	}
	return false;
	}

	void write_zero_header(struct context ctx, struct statistics stats) {
	ctx->header.type = unused;
	ctx->header.size = ctx->zero_read;
	fprintf(stderr, "Writing zero chunk of %d blocks\n", ctx->zero_read);
	fwrite(&ctx->header, sizeof(ctx->header), 1, stdout);

	stats->zero_chunks_written++;
	stats->zero_blocks_written += ctx->zero_read;

	ctx->zero_read = 0;
	}

	void write_data_header(struct context ctx, char full_buf, struct statistics *stats) {
	ctx->header.type = used;
	ctx->header.size = ctx->buf_read;
	fprintf(stderr, "Writing data chunk of %d blocks\n", ctx->buf_read);
	fwrite(&ctx->header, sizeof(ctx->header), 1, stdout);
	fwrite(full_buf, ctx->block_size, ctx->buf_read, stdout);

	stats->data_chunks_written++;
	stats->data_blocks_written += ctx->buf_read;

	ctx->buf_read = 0;
	ctx->cur_buf = full_buf;
	}

	struct statistics stats = {0, 0, 0, 0};

	int main(char args[]) {
	char buf = malloc(block_size MAX_BLOCK_COUNT);
	struct context ctx = {0};
	int latest;

	ctx.cur_buf = buf;
	ctx.block_size = block_size;

	// so what are our cases?
	// 1. data has been read and we read a zero block.
	// 2. data has been read and we read a data block.
	// 3. data has not been read and we read a zero block.
	// 4. data has not been read and we read a data block.

	// this code will fail to read a partial block at the end.

	fwrite(&our_header, sizeof(struct spaf_h), 1, stdout);

	while (latest = fread(ctx.cur_buf, block_size, 1, stdin) == 1) {
	// 1. scan the chunk for data
	if (has_data(ctx.cur_buf, block_size)) {
	// case 1: we read data but we had zeroes before.

	if (ctx.zero_read > 0) {
	// we write out a zero header:
	write_zero_header(&ctx, &stats);
	ctx.buf_read = 1;
	ctx.cur_buf += block_size;
	} else {
	// case 2: we may or may not have read data before.
	ctx.buf_read++;
	if (ctx.buf_read == MAX_BLOCK_COUNT) {
	// write out chunk
	write_data_header(&ctx, buf, &stats);
	} else {
	ctx.cur_buf += block_size;
	}
	}
	} else {
	// case 4: we read zero but we have existing data
	if (ctx.buf_read > 0) {
	write_data_header(&ctx, buf, &stats);
	ctx.zero_read = 1;
	} else {
	// case 3: we may or may not have read zero before:
	ctx.zero_read++;
	// check whether we are exceeding the maximum chunk size.
	// for 4k blocks this is 64k * 4k = 256MB.
	// that's not a lot...
	if (ctx.zero_read == (1 << (sizeof(unsigned short) * 8)) - 1) {
	write_zero_header(&ctx, &stats);
	}
	}
	}
	}
	if (ctx.zero_read > 0) {
	write_zero_header(&ctx, &stats);
	} else {
	write_data_header(&ctx, buf, &stats);
	}

	fixup(&stats);

	fprintf(stderr, "Bytes read %" PRIu64 ", data chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Zero chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Total blocks written %" PRIu64 " and total chunks %d.\n", stats.total_blocks_written * block_size, stats.data_chunks_written, stats.data_blocks_written, stats.data_blocks_written * block_size, stats.zero_chunks_written, stats.zero_blocks_written, stats.zero_blocks_written * block_size, stats.total_blocks_written, stats.total_chunks_written);
	}
	#define MAGIC "SPAF"
	#define VERSION "10"

	#define false 0
	#define true 1

	#define MAX_BLOCK_COUNT 4096
	#define DEF_BLOCK_SIZE 4096

	/**
	* This macro changes off_t to 64 bits, and ftruncate to 64 bits.
	*/

	#define _FILE_OFFSET_BITS 64

	#include <stdint.h>

	/*
	* Although I don't really see why I shouldn't use 64-bit versions directly.
	* fprintf requires the use of PRIu64 or PRIi64 macros to select the proper long type to get at 64 bits.
	*/

	enum checksum_algos {
	algo_crc32 = 0,
	algo_md5sum
	};

	typedef unsigned char bool;
	typedef unsigned char byte;

	struct spaf_h { // 16 bytes
	char magic[4]; // "SPAF"
	char version[2]; // "10"
	bool hammington_used:8; // unused, could be used to get a kind of ECC correction without using ECC
	// memory, but would probably be rather slow.
	byte hammington_block_size:8; // would have to be 247 with 8 parity bits and one unused extra parity.
	// but this is the data part. The full code is (255, 247) and matrices could
	// be downloaded at [1]
	// The output of the matrix is the 8 parity bit position values that can
	// indicate an error. To be error free, the computation of this vector
	// needs to be the zero vector.

	bool checksum_used:8; // 9
	unsigned short checksum_bits:16; // 11
	enum checksum_algos checksum_algo:8; // 12
	uint32_t block_size:32; // 16
	} __attribute__((packed));

	// [1] http://www.uni-kl.de/en/channel-codes/channel-codes-database/bch-and-hamming/

	enum block_type {
	used = 0,
	unused
	};

	struct chunk_h {

	enum block_type type:8;
	unsigned short size:16;
	byte padding:8;

	} __attribute__((packed));

	struct chunk_h_checksum {
	enum block_type type:8;
	unsigned short size:16;
	unsigned long checksum:32;
	} __attribute__((packed));



	struct spaf_h our_header = {
	MAGIC,
	VERSION,
	false,
	247,
	false,
	32,
	algo_crc32,
	4096
	};