janisz · January 3, 2016 05:29
diff --git a/lightweight_compresion.cu b/lightweight_compresion.cu
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <cuda.h>

 #define BLOCKS 10
 #define THREADS 100
 #define ELEMENTS_COUNT (BLOCKS*THREADS)
 #define DATA_SIZE (ELEMENTS_COUNT * sizeof(storeElement))
 #define COMPRESSED_ELEMENT_SIZE 10
 #define COMPRESSED_DATA_SIZE (ELEMENTS_COUNT*COMPRESSED_ELEMENT_SIZE+4)

 struct storeElement
 {
 	int32_t tag;
 	int32_t metric;
 	int64_t time;
 	float value;
 	storeElement(int32_t _tag, int32_t _metric, int64_t _time, float _value)
 			: tag(_tag), metric(_metric), time(_time), value(_value) {}
 	bool operator==(const storeElement & rhs) {
 		return metric == rhs.metric && tag == rhs.tag && time == rhs.time && value == rhs.value;
 	}
 	bool operator!=(const storeElement & rhs) {
 		return !(*this == rhs);
 	}
 	void print() {
 		printf("%d/%d/%ld/%f", metric, tag, time, value);
 	}
 };

 union converter {
   int32_t toInt, fromInt;
   float toFloat, fromFloat;
   unsigned char toBytes[4], fromBytes[4];
 };

 __device__
 void copyBytes(unsigned char * dest, const unsigned char * source, const int size) {
 	for (int i=0;i<size;i++) {
 		dest[i] = source[i];
 	}
 }

 __global__
 void EncodeKernel(storeElement * in_d, unsigned char * out_d) {
 	int index = blockIdx.x*THREADS + threadIdx.x;
 	int32_t low = in_d[index].time & 0xFFFFFFFF;
 	int32_t high = (in_d[index].time >> 32) & 0xFFFFFFFF;
 	int32_t position = 10*index + 4;
 	converter c;
 	if ( index == 0) {
 		c.fromInt = high;
 		copyBytes(out_d, c.toBytes, 4);
 	}
 	out_d[position] = (unsigned char)in_d[index].tag;
 	position++;
 	out_d[position] = (unsigned char)in_d[index].metric;
 	position++;

 	c.fromInt = low;
 	copyBytes(out_d+position, c.toBytes, 4);

 	position += 4;

 	c.fromFloat = in_d[index].value;
 	copyBytes(out_d+position, c.toBytes, 4);
 }

 __global__
 void DecodeKernel(unsigned char * in_d, storeElement * out_d) {
 	int index = blockIdx.x*THREADS + threadIdx.x;
 	converter c;
 	copyBytes(c.fromBytes, in_d, 4);

 	int64_t high = c.toInt;
 	int32_t position = 10*index + 4;

 	out_d[index].tag = in_d[position];
 	position++;
 	out_d[index].metric = in_d[position];
 	position++;

 	copyBytes(c.fromBytes, in_d + position, 4);
 	out_d[index].time = ((int64_t)c.toInt & 0xFFFFFFFF) | (high << 32);

 	position += 4;

 	copyBytes(c.fromBytes, in_d + position, 4);
 	out_d[index].value = c.toFloat;
 }

 int main(void)
 {
 	storeElement *input;				//data that will be compressed
 	unsigned char *output;				//space for data to copy from device
 	storeElement *dCompessionInput;		//device pointer to data that will be compressed
 	unsigned char *dCompressionOutput;	//output space for compressed data
 	storeElement *dDecompressionOutput;	//output space for decompressed data

 	//prepare data
 	input = (storeElement*)malloc(DATA_SIZE);
 	for (int i=0;i<ELEMENTS_COUNT;i++) {
 		input[i] = storeElement(i%128+1, i%64+1, 21474830000 + i , sin(i));
 	}

 	//allocate memory
 	output = (unsigned char*)calloc(DATA_SIZE, sizeof(unsigned char));

 	cudaMalloc((void**) &dCompessionInput, DATA_SIZE);
 	cudaMemcpy(dCompessionInput, input, DATA_SIZE, cudaMemcpyHostToDevice);

 	cudaMalloc((void**) &dCompressionOutput, COMPRESSED_DATA_SIZE);
 	cudaMemset(dCompressionOutput, 0, COMPRESSED_DATA_SIZE);

 	cudaMalloc((void**) &dDecompressionOutput, DATA_SIZE);
 	cudaMemset(dDecompressionOutput, 0, DATA_SIZE);

 	//compress
 	EncodeKernel<<< BLOCKS, THREADS >>>(dCompessionInput, dCompressionOutput);
 	cudaMemcpy(output, dCompressionOutput, COMPRESSED_DATA_SIZE, cudaMemcpyDeviceToHost);

 	//decompress
 	unsigned char *dDecompessionInput = dCompressionOutput;
 	DecodeKernel<<< BLOCKS, THREADS >>>(dDecompessionInput , dDecompressionOutput);
 	cudaMemcpy(output, dDecompressionOutput, DATA_SIZE, cudaMemcpyDeviceToHost);

 	//check
 	int ret = 0;
 	storeElement *actual = (storeElement*)output;
 	for (int i=0;i<ELEMENTS_COUNT;i++) {
 		if (input[i] != actual[i]) {
 			printf("Element at %d should be: ", i);
 			input[i].print();
 			printf(" but is: ");
 			actual[i].print();
 			printf("\n");
 			ret = 1;
 		}
 	}
 	if (ret == 0) printf("OK\n");

 	//free
 	free(input);
 	free(output);
 	cudaFree(dCompessionInput);
 	cudaFree(dCompressionOutput);
 	cudaFree(dDecompressionOutput);

 	return ret;
 }
	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <cuda.h>

	#define BLOCKS 10
	#define THREADS 100
	#define ELEMENTS_COUNT (BLOCKS*THREADS)
	#define DATA_SIZE (ELEMENTS_COUNT * sizeof(storeElement))
	#define COMPRESSED_ELEMENT_SIZE 10
	#define COMPRESSED_DATA_SIZE (ELEMENTS_COUNT*COMPRESSED_ELEMENT_SIZE+4)

	struct storeElement
	{
	int32_t tag;
	int32_t metric;
	int64_t time;
	float value;
	storeElement(int32_t _tag, int32_t _metric, int64_t _time, float _value)
	: tag(_tag), metric(_metric), time(_time), value(_value) {}
	bool operator==(const storeElement & rhs) {
	return metric == rhs.metric && tag == rhs.tag && time == rhs.time && value == rhs.value;
	}
	bool operator!=(const storeElement & rhs) {
	return !(*this == rhs);
	}
	void print() {
	printf("%d/%d/%ld/%f", metric, tag, time, value);
	}
	};

	union converter {
	int32_t toInt, fromInt;
	float toFloat, fromFloat;
	unsigned char toBytes[4], fromBytes[4];
	};

	__device__
	void copyBytes(unsigned char * dest, const unsigned char * source, const int size) {
	for (int i=0;i<size;i++) {
	dest[i] = source[i];
	}
	}

	__global__
	void EncodeKernel(storeElement * in_d, unsigned char * out_d) {
	int index = blockIdx.x*THREADS + threadIdx.x;
	int32_t low = in_d[index].time & 0xFFFFFFFF;
	int32_t high = (in_d[index].time >> 32) & 0xFFFFFFFF;
	int32_t position = 10*index + 4;
	converter c;
	if ( index == 0) {
	c.fromInt = high;
	copyBytes(out_d, c.toBytes, 4);
	}
	out_d[position] = (unsigned char)in_d[index].tag;
	position++;
	out_d[position] = (unsigned char)in_d[index].metric;
	position++;

	c.fromInt = low;
	copyBytes(out_d+position, c.toBytes, 4);

	position += 4;

	c.fromFloat = in_d[index].value;
	copyBytes(out_d+position, c.toBytes, 4);
	}

	__global__
	void DecodeKernel(unsigned char * in_d, storeElement * out_d) {
	int index = blockIdx.x*THREADS + threadIdx.x;
	converter c;
	copyBytes(c.fromBytes, in_d, 4);

	int64_t high = c.toInt;
	int32_t position = 10*index + 4;

	out_d[index].tag = in_d[position];
	position++;
	out_d[index].metric = in_d[position];
	position++;

	copyBytes(c.fromBytes, in_d + position, 4);
	out_d[index].time = ((int64_t)c.toInt & 0xFFFFFFFF) \| (high << 32);

	position += 4;

	copyBytes(c.fromBytes, in_d + position, 4);
	out_d[index].value = c.toFloat;
	}

	int main(void)
	{
	storeElement *input; //data that will be compressed
	unsigned char *output; //space for data to copy from device
	storeElement *dCompessionInput; //device pointer to data that will be compressed
	unsigned char *dCompressionOutput; //output space for compressed data
	storeElement *dDecompressionOutput; //output space for decompressed data

	//prepare data
	input = (storeElement*)malloc(DATA_SIZE);
	for (int i=0;i<ELEMENTS_COUNT;i++) {
	input[i] = storeElement(i%128+1, i%64+1, 21474830000 + i , sin(i));
	}

	//allocate memory
	output = (unsigned char*)calloc(DATA_SIZE, sizeof(unsigned char));

	cudaMalloc((void**) &dCompessionInput, DATA_SIZE);
	cudaMemcpy(dCompessionInput, input, DATA_SIZE, cudaMemcpyHostToDevice);

	cudaMalloc((void**) &dCompressionOutput, COMPRESSED_DATA_SIZE);
	cudaMemset(dCompressionOutput, 0, COMPRESSED_DATA_SIZE);

	cudaMalloc((void**) &dDecompressionOutput, DATA_SIZE);
	cudaMemset(dDecompressionOutput, 0, DATA_SIZE);

	//compress
	EncodeKernel<<< BLOCKS, THREADS >>>(dCompessionInput, dCompressionOutput);
	cudaMemcpy(output, dCompressionOutput, COMPRESSED_DATA_SIZE, cudaMemcpyDeviceToHost);

	//decompress
	unsigned char *dDecompessionInput = dCompressionOutput;
	DecodeKernel<<< BLOCKS, THREADS >>>(dDecompessionInput , dDecompressionOutput);
	cudaMemcpy(output, dDecompressionOutput, DATA_SIZE, cudaMemcpyDeviceToHost);

	//check
	int ret = 0;
	storeElement actual = (storeElement)output;
	for (int i=0;i<ELEMENTS_COUNT;i++) {
	if (input[i] != actual[i]) {
	printf("Element at %d should be: ", i);
	input[i].print();
	printf(" but is: ");
	actual[i].print();
	printf("\n");
	ret = 1;
	}
	}
	if (ret == 0) printf("OK\n");

	//free
	free(input);
	free(output);
	cudaFree(dCompessionInput);
	cudaFree(dCompressionOutput);
	cudaFree(dDecompressionOutput);

	return ret;
	}