Skip to content

Instantly share code, notes, and snippets.

View louchenyao's full-sized avatar

Chenyao Lou louchenyao

View GitHub Profile
@louchenyao
louchenyao / bench_seq.cu
Last active July 31, 2020 01:49
Benchmark of GPU sequential read performance
// nvcc bench_seq.cu -O3 -gencode arch=compute_70,code=sm_70
// Result on V100:
// bench_seq max throughput: 769.856 GiB/s
// bench_seq_unroll max throughput: 833.067 GiB/s
#include <iostream>
__device__
int64_t reduce(int64_t *buf, uint64_t s) {
// The more effecient way is to reduce within the wrap firstly, but it's not the bottleneck
CREATE TABLE date_ (
d_datekey INTEGER,
d_date TEXT ENCODING DICT(32),
d_dayofweek TEXT ENCODING DICT(8),
d_month TEXT ENCODING DICT(8),
d_year SMALLINT,
d_yearmonthnum INTEGER,
d_yearmonth TEXT ENCODING DICT(16),
d_daynuminweek SMALLINT,
d_daynuminmonth SMALLINT,
#include <algorithm>
#include <cstdio>
#include <cstdlib>
template <typename T>
void cmp(T &a, T &b) {
if (a > b) {
std::swap(a, b);
}
}
#include <cassert>
#include <cstdio>
#include <cmath>
#include <random>
int zipf(double alpha, int n, std::mt19937_64 &rng)
{
static bool first = true; // Static first time flag
static double c = 0; // Normalization constant
static double *sum_probs; // Pre-calculated sum of probabilities
// This is a C++ CUDA port of the Zipf Distribution generator from https://github.com/jonhoo/rust-zipf/blob/master/src/lib.rs, written in Rust.
#include <cassert>
#include <cstdio>
#include <cmath>
#include <curand_kernel.h>
namespace Zipf {
struct ZipfDist {
@louchenyao
louchenyao / two_or_three.cpp
Created January 7, 2021 04:10
two_or_three.cpp
// clang++ two_or_three.cpp -O3 --std=c++17
// results:
// two : 130.3 ns
// two+ : 131.0 ns
// three: 133.2 ns
#include <stdlib.h>
#include <stdio.h>
#include <string.h>