-
-
Save dzitkowskik/8416722 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <cuda.h> | |
#define ELEMENTS_COUNT 10 | |
#define DATA_SIZE (ELEMENTS_COUNT * sizeof(storeElement)) | |
struct storeElement | |
{ | |
int32_t tag; | |
int32_t metric; | |
int64_t time; | |
float value; | |
storeElement(int32_t _tag, int32_t _metric, int64_t _time, float _value) | |
: tag(_tag), metric(_metric), time(_time), value(_value) {} | |
}; | |
union converter { | |
int32_t toInt, fromInt; | |
float toFloat, fromFloat; | |
unsigned char toBytes[4], fromBytes[4]; | |
}; | |
__device__ | |
void copyBytes(unsigned char * dest, const unsigned char * source, const int size) { | |
for (int i=0;i<size;i++) { | |
dest[i] = source[i]; | |
} | |
} | |
__global__ | |
void EncodeKernel(storeElement * in_d, unsigned char * out_d) { | |
int index = threadIdx.x; | |
int32_t low = in_d[index].time & 0xFFFFFFFF; | |
int32_t high = (in_d[index].time >> 32) & 0xFFFFFFFF; | |
int32_t position = 10*index + 4; | |
converter c; | |
if ( index == 0) { | |
c.fromInt = high; | |
copyBytes(out_d, c.toBytes, 4); | |
} | |
out_d[position] = (unsigned char)in_d[index].tag; | |
position++; | |
out_d[position] = (unsigned char)in_d[index].metric; | |
position++; | |
c.fromInt = low; | |
copyBytes(out_d+position, c.toBytes, 4); | |
position += 4; | |
c.fromFloat = in_d[index].value; | |
copyBytes(out_d+position, c.toBytes, 4); | |
} | |
__global__ | |
void DecodeKernel(unsigned char * in_d, storeElement * out_d) { | |
int index = threadIdx.x; | |
converter c; | |
copyBytes(c.fromBytes, in_d, 4); | |
int64_t high = c.toInt; | |
int32_t position = 10*index + 4; | |
out_d[index].tag = in_d[position]; | |
position++; | |
out_d[index].metric = in_d[position]; | |
position++; | |
copyBytes(c.fromBytes, in_d + position, 4); | |
out_d[index].time = ((int64_t)c.toInt & 0xFFFFFFFF) | (high << 32); | |
position += 4; | |
copyBytes(c.fromBytes, in_d + position, 4); | |
out_d[index].value = c.toFloat; | |
} | |
int main(void) | |
{ | |
storeElement *input, *result; //data that will be compressed | |
unsigned char *output; //space for data to copy from device | |
storeElement *dCompessionInput; //device pointer to data that will be compressed | |
unsigned char *dCompressionOutput; //output space for compressed data | |
storeElement *dDecompressionOutput; //output space for decompressed data | |
//prepare data and initialize memory | |
input = (storeElement*)malloc(DATA_SIZE); | |
for (int i=0;i<ELEMENTS_COUNT;i++) { | |
input[i] = storeElement(i%128+1, i%64+1, 21474830000 + i , sin(i)); | |
} | |
output = (unsigned char*)calloc(DATA_SIZE, sizeof(unsigned char)); | |
result = (storeElement*)calloc(DATA_SIZE, sizeof(unsigned char)); | |
cudaMalloc((void**) &dCompessionInput, DATA_SIZE); | |
cudaMemcpy(dCompessionInput, input, DATA_SIZE, cudaMemcpyHostToDevice); | |
cudaMalloc((void**) &dCompressionOutput, DATA_SIZE); | |
cudaMemset(dCompressionOutput, 0, DATA_SIZE); | |
cudaMalloc((void**) &dDecompressionOutput, DATA_SIZE); | |
cudaMemset(dDecompressionOutput, 0, DATA_SIZE); | |
printf("RAW data:\n"); | |
unsigned char *data = (unsigned char*)input; | |
for (int i=0;i<ELEMENTS_COUNT*sizeof(storeElement);i++) { | |
printf("%4d", (int)data[i]); | |
} | |
printf("\n"); | |
//compress | |
EncodeKernel<<< 1, ELEMENTS_COUNT >>>(dCompessionInput, dCompressionOutput); | |
cudaMemcpy(output, dCompressionOutput, DATA_SIZE, cudaMemcpyDeviceToHost); | |
printf("Compressed data\n"); | |
for (int i=0;i<ELEMENTS_COUNT*sizeof(storeElement);i++) { | |
printf("%4d", (int)output[i]); | |
} | |
printf("\n"); | |
//decompress | |
unsigned char *dDecompessionInput = dCompressionOutput; | |
DecodeKernel<<< 1, ELEMENTS_COUNT >>>(dDecompessionInput , dDecompressionOutput); | |
cudaMemcpy(output, dDecompressionOutput, DATA_SIZE, cudaMemcpyDeviceToHost); | |
//check | |
storeElement *actual = (storeElement*)output; | |
for (int i=0;i<ELEMENTS_COUNT;i++) { | |
if (input[i].metric != actual[i].metric || input[i].tag != actual[i].tag || input[i].time != actual[i].time || input[i].value != actual[i].value) { | |
printf("Element at %d should be: %d/%d/%ld/%f but is %d/%d/%ld/%f\n", \ | |
i, input[i].metric, input[i].tag, input[i].time, input[i].value, actual[i].metric, actual[i].tag, actual[i].time, actual[i].value); | |
} | |
} | |
//free | |
free(input); | |
free(output); | |
cudaFree(dCompessionInput); | |
cudaFree(dCompressionOutput); | |
cudaFree(dDecompressionOutput); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment