Last active
January 3, 2016 05:29
-
-
Save janisz/8416524 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <cuda.h> | |
#define BLOCKS 10 | |
#define THREADS 100 | |
#define ELEMENTS_COUNT (BLOCKS*THREADS) | |
#define DATA_SIZE (ELEMENTS_COUNT * sizeof(storeElement)) | |
#define COMPRESSED_ELEMENT_SIZE 10 | |
#define COMPRESSED_DATA_SIZE (ELEMENTS_COUNT*COMPRESSED_ELEMENT_SIZE+4) | |
struct storeElement | |
{ | |
int32_t tag; | |
int32_t metric; | |
int64_t time; | |
float value; | |
storeElement(int32_t _tag, int32_t _metric, int64_t _time, float _value) | |
: tag(_tag), metric(_metric), time(_time), value(_value) {} | |
bool operator==(const storeElement & rhs) { | |
return metric == rhs.metric && tag == rhs.tag && time == rhs.time && value == rhs.value; | |
} | |
bool operator!=(const storeElement & rhs) { | |
return !(*this == rhs); | |
} | |
void print() { | |
printf("%d/%d/%ld/%f", metric, tag, time, value); | |
} | |
}; | |
union converter { | |
int32_t toInt, fromInt; | |
float toFloat, fromFloat; | |
unsigned char toBytes[4], fromBytes[4]; | |
}; | |
__device__ | |
void copyBytes(unsigned char * dest, const unsigned char * source, const int size) { | |
for (int i=0;i<size;i++) { | |
dest[i] = source[i]; | |
} | |
} | |
__global__ | |
void EncodeKernel(storeElement * in_d, unsigned char * out_d) { | |
int index = blockIdx.x*THREADS + threadIdx.x; | |
int32_t low = in_d[index].time & 0xFFFFFFFF; | |
int32_t high = (in_d[index].time >> 32) & 0xFFFFFFFF; | |
int32_t position = 10*index + 4; | |
converter c; | |
if ( index == 0) { | |
c.fromInt = high; | |
copyBytes(out_d, c.toBytes, 4); | |
} | |
out_d[position] = (unsigned char)in_d[index].tag; | |
position++; | |
out_d[position] = (unsigned char)in_d[index].metric; | |
position++; | |
c.fromInt = low; | |
copyBytes(out_d+position, c.toBytes, 4); | |
position += 4; | |
c.fromFloat = in_d[index].value; | |
copyBytes(out_d+position, c.toBytes, 4); | |
} | |
__global__ | |
void DecodeKernel(unsigned char * in_d, storeElement * out_d) { | |
int index = blockIdx.x*THREADS + threadIdx.x; | |
converter c; | |
copyBytes(c.fromBytes, in_d, 4); | |
int64_t high = c.toInt; | |
int32_t position = 10*index + 4; | |
out_d[index].tag = in_d[position]; | |
position++; | |
out_d[index].metric = in_d[position]; | |
position++; | |
copyBytes(c.fromBytes, in_d + position, 4); | |
out_d[index].time = ((int64_t)c.toInt & 0xFFFFFFFF) | (high << 32); | |
position += 4; | |
copyBytes(c.fromBytes, in_d + position, 4); | |
out_d[index].value = c.toFloat; | |
} | |
int main(void) | |
{ | |
storeElement *input; //data that will be compressed | |
unsigned char *output; //space for data to copy from device | |
storeElement *dCompessionInput; //device pointer to data that will be compressed | |
unsigned char *dCompressionOutput; //output space for compressed data | |
storeElement *dDecompressionOutput; //output space for decompressed data | |
//prepare data | |
input = (storeElement*)malloc(DATA_SIZE); | |
for (int i=0;i<ELEMENTS_COUNT;i++) { | |
input[i] = storeElement(i%128+1, i%64+1, 21474830000 + i , sin(i)); | |
} | |
//allocate memory | |
output = (unsigned char*)calloc(DATA_SIZE, sizeof(unsigned char)); | |
cudaMalloc((void**) &dCompessionInput, DATA_SIZE); | |
cudaMemcpy(dCompessionInput, input, DATA_SIZE, cudaMemcpyHostToDevice); | |
cudaMalloc((void**) &dCompressionOutput, COMPRESSED_DATA_SIZE); | |
cudaMemset(dCompressionOutput, 0, COMPRESSED_DATA_SIZE); | |
cudaMalloc((void**) &dDecompressionOutput, DATA_SIZE); | |
cudaMemset(dDecompressionOutput, 0, DATA_SIZE); | |
//compress | |
EncodeKernel<<< BLOCKS, THREADS >>>(dCompessionInput, dCompressionOutput); | |
cudaMemcpy(output, dCompressionOutput, COMPRESSED_DATA_SIZE, cudaMemcpyDeviceToHost); | |
//decompress | |
unsigned char *dDecompessionInput = dCompressionOutput; | |
DecodeKernel<<< BLOCKS, THREADS >>>(dDecompessionInput , dDecompressionOutput); | |
cudaMemcpy(output, dDecompressionOutput, DATA_SIZE, cudaMemcpyDeviceToHost); | |
//check | |
int ret = 0; | |
storeElement *actual = (storeElement*)output; | |
for (int i=0;i<ELEMENTS_COUNT;i++) { | |
if (input[i] != actual[i]) { | |
printf("Element at %d should be: ", i); | |
input[i].print(); | |
printf(" but is: "); | |
actual[i].print(); | |
printf("\n"); | |
ret = 1; | |
} | |
} | |
if (ret == 0) printf("OK\n"); | |
//free | |
free(input); | |
free(output); | |
cudaFree(dCompessionInput); | |
cudaFree(dCompressionOutput); | |
cudaFree(dDecompressionOutput); | |
return ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment