Created
August 17, 2018 04:11
-
-
Save tehnerd/adfd2a5b5a2241a7a5380a2b35647e1a to your computer and use it in GitHub Desktop.
siphash cuda
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
to run: | |
compile: nvcc -o ./siphash_col ./siphash_collision.cu -O3 -std=c++11 | |
run: | |
time ./siphash_col | |
... | |
hash : 12260507120865799508 collided hash 15883040871881799508 | |
mod_hash: 1799508 mod_coll_hash: 1799508 | |
index 3799705340 | |
hash : 12260507120865799508 collided hash 4453885830073799508 | |
mod_hash: 1799508 mod_coll_hash: 1799508 | |
index 1566152445 | |
hash : 12260507120865799508 collided hash 8030928623037799508 | |
mod_hash: 1799508 mod_coll_hash: 1799508 | |
num of collisions 689 | |
num of collisions 0 | |
real 0m15.143s | |
user 0m1.640s | |
sys 0m3.012s | |
or under proffile: | |
nvprof ./siphash_col | |
... | |
hash : 12260507120865799508 collided hash 4453885830073799508 | |
mod_hash: 1799508 mod_coll_hash: 1799508 | |
index 1566152445 | |
hash : 12260507120865799508 collided hash 8030928623037799508 | |
mod_hash: 1799508 mod_coll_hash: 1799508 | |
num of collisions 689 | |
num of collisions 0 | |
==21339== Profiling application: ./siphash_col | |
==21339== Profiling result: | |
Time(%) Time Calls Avg Min Max Name | |
97.22% 14.3180s 9 1.59089s 1.22229s 2.65384s calc_hash(unsigned long, unsigned long, unsigned long, unsigned long*) | |
2.60% 383.39ms 10 38.339ms 13.796ms 51.558ms [CUDA memcpy DtoH] | |
0.18% 26.358ms 1 26.358ms 26.358ms 26.358ms [CUDA memcpy HtoD] | |
==21339== API calls: | |
Time(%) Time Calls Avg Min Max Name | |
98.55% 14.7354s 11 1.33958s 14.500ms 2.69470s cudaMemcpy | |
1.42% 211.93ms 1 211.93ms 211.93ms 211.93ms cudaMalloc | |
0.03% 4.2726ms 1 4.2726ms 4.2726ms 4.2726ms cudaFree | |
0.01% 999.52us 9 111.06us 97.550us 120.94us cudaLaunch | |
0.00% 106.87us 91 1.1740us 625ns 23.489us cuDeviceGetAttribute | |
0.00% 40.619us 36 1.1280us 572ns 4.6880us cudaSetupArgument | |
0.00% 29.898us 9 3.3220us 2.8130us 4.0110us cudaConfigureCall | |
0.00% 6.3540us 1 6.3540us 6.3540us 6.3540us cuDeviceTotalMem | |
0.00% 5.9380us 3 1.9790us 1.0410us 3.3860us cuDeviceGetCount | |
0.00% 3.6460us 3 1.2150us 938ns 1.4580us cuDeviceGet | |
0.00% 1.8230us 1 1.8230us 1.8230us 1.8230us cuDeviceGetName | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cstdint> | |
#include <vector> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
/* default: SipHash-2-4 */ | |
#define cROUNDS 2 | |
#define dROUNDS 4 | |
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) | |
#define U32TO8_LE(p, v) \ | |
(p)[0] = (uint8_t)((v)); \ | |
(p)[1] = (uint8_t)((v) >> 8); \ | |
(p)[2] = (uint8_t)((v) >> 16); \ | |
(p)[3] = (uint8_t)((v) >> 24); | |
#define U64TO8_LE(p, v) \ | |
U32TO8_LE((p), (uint32_t)((v))); \ | |
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); | |
#define U8TO64_LE(p) \ | |
(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ | |
((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ | |
((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ | |
((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) | |
#define SIPROUND \ | |
do { \ | |
v0 += v1; \ | |
v1 = ROTL(v1, 13); \ | |
v1 ^= v0; \ | |
v0 = ROTL(v0, 32); \ | |
v2 += v3; \ | |
v3 = ROTL(v3, 16); \ | |
v3 ^= v2; \ | |
v0 += v3; \ | |
v3 = ROTL(v3, 21); \ | |
v3 ^= v0; \ | |
v2 += v1; \ | |
v1 = ROTL(v1, 17); \ | |
v1 ^= v2; \ | |
v2 = ROTL(v2, 32); \ | |
} while (0) | |
#ifdef DEBUG | |
#define TRACE \ | |
do { \ | |
printf("(%3d) v0 %08x %08x\n", (int)inlen, \ | |
(uint32_t)(v0 >> 32), (uint32_t)v0); \ | |
printf("(%3d) v1 %08x %08x\n", (int)inlen, \ | |
(uint32_t)(v1 >> 32), (uint32_t)v1); \ | |
printf("(%3d) v2 %08x %08x\n", (int)inlen, \ | |
(uint32_t)(v2 >> 32), (uint32_t)v2); \ | |
printf("(%3d) v3 %08x %08x\n", (int)inlen, \ | |
(uint32_t)(v3 >> 32), (uint32_t)v3); \ | |
} while (0) | |
#else | |
#define TRACE | |
#endif | |
__host__ __device__ int siphash(uint8_t *out, const uint8_t *in, uint64_t inlen, const uint8_t *k) | |
{ | |
/* "somepseudorandomlygeneratedbytes" */ | |
uint64_t v0 = 0x736f6d6570736575ULL; | |
uint64_t v1 = 0x646f72616e646f6dULL; | |
uint64_t v2 = 0x6c7967656e657261ULL; | |
uint64_t v3 = 0x7465646279746573ULL; | |
uint64_t b; | |
uint64_t k0 = U8TO64_LE(k); | |
uint64_t k1 = U8TO64_LE(k + 8); | |
uint64_t m; | |
int i; | |
const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); | |
const int left = inlen & 7; | |
b = ((uint64_t)inlen) << 56; | |
v3 ^= k1; | |
v2 ^= k0; | |
v1 ^= k1; | |
v0 ^= k0; | |
#ifdef DOUBLE | |
v1 ^= 0xee; | |
#endif | |
for (; in != end; in += 8) { | |
m = U8TO64_LE(in); | |
v3 ^= m; | |
TRACE; | |
for (i = 0; i < cROUNDS; ++i) | |
SIPROUND; | |
v0 ^= m; | |
} | |
switch (left) { | |
case 7: | |
b |= ((uint64_t)in[6]) << 48; | |
case 6: | |
b |= ((uint64_t)in[5]) << 40; | |
case 5: | |
b |= ((uint64_t)in[4]) << 32; | |
case 4: | |
b |= ((uint64_t)in[3]) << 24; | |
case 3: | |
b |= ((uint64_t)in[2]) << 16; | |
case 2: | |
b |= ((uint64_t)in[1]) << 8; | |
case 1: | |
b |= ((uint64_t)in[0]); | |
break; | |
case 0: | |
break; | |
} | |
v3 ^= b; | |
TRACE; | |
for (i = 0; i < cROUNDS; ++i) | |
SIPROUND; | |
v0 ^= b; | |
#ifndef DOUBLE | |
v2 ^= 0xff; | |
#else | |
v2 ^= 0xee; | |
#endif | |
TRACE; | |
for (i = 0; i < dROUNDS; ++i) | |
SIPROUND; | |
b = v0 ^ v1 ^ v2 ^ v3; | |
U64TO8_LE(out, b); | |
#ifdef DOUBLE | |
v1 ^= 0xdd; | |
TRACE; | |
for (i = 0; i < dROUNDS; ++i) | |
SIPROUND; | |
b = v0 ^ v1 ^ v2 ^ v3; | |
U64TO8_LE(out + 8, b); | |
#endif | |
return 0; | |
} | |
namespace { | |
constexpr int kTSize = 4000000; // size of hash table ; this is m in hash%m | |
} | |
__global__ void calc_hash( | |
uint64_t hash, uint64_t base, uint64_t max, uint64_t* bvec) { | |
uint64_t shash; | |
uint8_t key[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
int idx = threadIdx.x; | |
uint64_t i = base + idx; | |
int incr = 1024; | |
while (i < max) { | |
shash = 0; | |
siphash((uint8_t*)&shash, (uint8_t*)&i, sizeof(i), (uint8_t*)&key); | |
if (shash%kTSize == hash) { | |
bvec[idx] = i; | |
} | |
i += incr; | |
} | |
} | |
int main() { | |
std::vector<uint64_t> bvec(kTSize); | |
for (int i = 0; i < kTSize; i++) { | |
bvec[i] = 0; | |
} | |
uint64_t hash = 0; | |
uint64_t hash1 = 0; | |
uint64_t t = 2307; | |
uint64_t t2 = 464602104; | |
uint64_t* bvec_c; | |
uint8_t key[16] = {0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
// add_vec(a.data(), b.data(), c.data(), kSize); | |
cudaMalloc((void**)&bvec_c, kTSize * sizeof(uint64_t)); | |
cudaMemcpy(bvec_c, bvec.data(), | |
kTSize * sizeof(uint64_t), cudaMemcpyHostToDevice); | |
siphash((uint8_t*)&hash, (uint8_t*)&t, sizeof(t), (uint8_t*)&key); | |
siphash((uint8_t*)&hash1, (uint8_t*)&t, sizeof(t), (uint8_t*)&key); | |
std::cout << "hash is " << hash%kTSize << " hash1 " << hash1%kTSize << std::endl; | |
hash1 %= kTSize; | |
uint64_t base = 0; | |
uint64_t increment = 500000000; | |
uint64_t max = 0; | |
uint64_t ml = 4294967296; | |
while(max < ml) { | |
/* my jetson tx1 was crashing if it were doing more then ~500mil iterations | |
at once. hence this stupid hack w/ increment */ | |
max += increment; | |
calc_hash<<<1, 1024>>>(hash1, base, max, bvec_c); | |
base += increment; | |
/* again hack becuase of tx1 crashes. cudaMemcpy force host to blockwait till | |
all previously started kernels would be complited on gpu*/ | |
cudaMemcpy(bvec.data(), bvec_c, | |
kTSize * sizeof(uint64_t), cudaMemcpyDeviceToHost); | |
} | |
cudaMemcpy(bvec.data(), bvec_c, | |
kTSize * sizeof(uint64_t), cudaMemcpyDeviceToHost); | |
cudaFree(bvec_c); | |
std::cout << "done\n"; | |
int ctr = 0; | |
for (int i = 0; i < bvec.size(); i++) { | |
if (bvec[i] > 0) { | |
ctr++; | |
uint64_t nhash; | |
uint64_t idx = bvec[i]; | |
std::cout << "index " << idx << std::endl; | |
siphash((uint8_t*)&nhash, (uint8_t*)&idx, sizeof(idx), (uint8_t*)&key); | |
std::cout << "hash : " << hash << " collided hash " << nhash << std::endl; | |
std::cout << "mod_hash: " << hash % kTSize << " mod_coll_hash: " << nhash%kTSize | |
<< std::endl; | |
} | |
} | |
std::cout << "num of collisions " << ctr << std::endl; | |
int cntr = 0; | |
uint64_t shash; | |
/* find collisions using cpu | |
for (uint64_t i = 0; i < 4294967296; i++) { | |
shash = 0; | |
siphash((uint8_t*)&shash, (uint8_t*)&i, sizeof(i), (uint8_t*)&key); | |
if (shash%kTSize == hash1) { | |
cntr++; | |
} | |
} | |
*/ | |
std::cout << "num of collisions " << cntr << std::endl; | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment