Last active
December 7, 2020 07:12
-
-
Save louchenyao/bdd08cf8b1d4525d076296b1567b4dd2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is a C++ CUDA port of the Zipf Distribution generator from https://github.com/jonhoo/rust-zipf/blob/master/src/lib.rs, written in Rust. | |
#include <cassert> | |
#include <cstdio> | |
#include <cmath> | |
#include <curand_kernel.h> | |
namespace Zipf { | |
struct ZipfDist { | |
double num_elements; | |
double exponent; | |
double h_integral_x1; | |
double h_integral_num_elements; | |
double s; | |
}; | |
__host__ __device__ | |
double helper1(double x) { | |
if (abs(x) > 1e-8) { | |
return log1p(x) / x; | |
} | |
return 1.0 - x * (0.5 - x * (1.0 / 3.0 - 0.25 * x)); | |
} | |
__host__ __device__ | |
double helper2(double x) { | |
if (abs(x) > 1e-8) { | |
return expm1(x) / x; | |
} | |
return 1.0 + x * 0.5 * (1.0 + x * 1.0 / 3.0 * (1.0 + 0.25 * x)); | |
} | |
__host__ __device__ | |
double h_integral(double x, double exponent) { | |
double log_x = log(x); | |
return helper2((1.0 - exponent) * log_x) * log_x; | |
} | |
__host__ __device__ | |
double h(double x, double exponent) { | |
return exp(-exponent * log(x)); | |
} | |
__host__ __device__ | |
double h_integral_inv(double x, double exponent) { | |
double t = x * (1.0 - exponent); | |
if (t < -1.0) { | |
t = -1.0; | |
} | |
return exp(helper1(t) * x); | |
} | |
void init_zipf_dist(int num_elements, double exponent, ZipfDist &z) { | |
assert(num_elements > 0); | |
assert(exponent > 0); | |
z.num_elements = num_elements; | |
z.exponent = exponent; | |
z.h_integral_x1 = h_integral(1.5, exponent) - 1.0; | |
z.h_integral_num_elements = h_integral(num_elements + 0.5, exponent); | |
z.s = 2.0 - h_integral_inv(h_integral(2.5, exponent) - h(2, exponent), exponent); | |
} | |
__device__ int nxt(ZipfDist zipf, curandState_t *s) { | |
double hnum = zipf.h_integral_num_elements; | |
while (true) { | |
double u = hnum + curand_uniform_double(s) * (zipf.h_integral_x1 - hnum); | |
double x = h_integral_inv(u, zipf.exponent); | |
double k64 = x > zipf.num_elements ? zipf.num_elements : (x < 1 ? 1 : x); | |
int k = int(k64) < 1 ? 1 : int(k64); | |
if (k64 - x <= zipf.s | |
|| u >= h_integral(k64 + 0.5, zipf.exponent) - h(k64, zipf.exponent)) { | |
return k; | |
} | |
} | |
} | |
__global__ void gen(unsigned long long seed, ZipfDist zipf, int N, int *output) { | |
const size_t total_threads = gridDim.x * blockDim.x; | |
const size_t elements_per_thread = (N + total_threads - 1) / total_threads; | |
const size_t tid = blockIdx.x*blockDim.x+threadIdx.x; | |
if (tid >= N) return; | |
curandState_t s; | |
curand(&s); | |
curand_init(seed, tid, 0, &s); | |
for (int i = tid*elements_per_thread; i < (tid+1)*elements_per_thread && i < N; i++) { | |
output[i] = nxt(zipf, &s); | |
} | |
} | |
bool test(double alpha) { | |
constexpr int N = 100; | |
int samples = pow(2.0, alpha) * 5000000; | |
// printf("samples = %d\n", samples); | |
Zipf::ZipfDist zipf; | |
Zipf::init_zipf_dist(N, alpha, zipf); | |
int *output; | |
cudaMallocManaged((void **)&output, samples * sizeof(int)); | |
Zipf::gen<<<40, 1024>>>(12345, zipf, samples, output); | |
cudaDeviceSynchronize(); | |
int buckets[N] = {0}; | |
for (int i = 0; i < samples; i++) { | |
buckets[output[i]-1] += 1; | |
} | |
cudaFree(output); | |
double harmonic = 0; | |
for (int i = 1; i <= N; i++) { | |
harmonic += 1.0 / pow(i, alpha); | |
} | |
for (int i = 0; i < N; i++) { | |
double freq = double(buckets[i]) / samples; | |
double expected = (1.0 / pow(i+1, alpha)) / harmonic; | |
// printf("i = %d, freq = %lf, expected = %lf\n", i + 1, freq, expected); | |
double off_by = abs(expected - freq); | |
if (off_by >= 0.1) return false; | |
bool good = false; | |
if (i == 0) { | |
if (alpha < 1) { | |
good = freq > expected && off_by < 0.5 * expected; | |
} else { | |
good = freq > expected && off_by < 0.3 * expected; | |
} | |
} else if (i == N - 1) { | |
good = off_by < expected; | |
} else { | |
good = off_by < 0.5 * expected; | |
} | |
if (!good) return false; | |
} | |
return true; | |
} | |
} // namespace Zipf | |
int main() { | |
double alphas[] = {0.5, 0.75, 1.0, 1.25}; | |
for (double a: alphas) { | |
printf("alpha = %lf: ", a); | |
if (Zipf::test(a)) { | |
printf("Pass\n"); | |
} else { | |
printf("Fail\n"); | |
return -1; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment