Skip to content

Instantly share code, notes, and snippets.

@andfoy
Created November 8, 2022 23:37
Show Gist options
  • Save andfoy/7244295739c7636ed0e16ac87ca9d458 to your computer and use it in GitHub Desktop.
Save andfoy/7244295739c7636ed0e16ac87ca9d458 to your computer and use it in GitHub Desktop.
import math
import tqdm
import cupy as cp
import numpy as np
from cupy import testing
from cupyx.profiler import benchmark
dtypes = [cp.uint8, cp.int32, cp.int64, cp.float32, cp.float64,
cp.complex64, cp.complex128]
sizes = [(x, x) for x in [3, 5, 10, 100, 500, 1000, 5000, 10000]]
times = {
'gpu': {
d: {
s: {
'cpu_mean': None,
'cpu_std': None,
'gpu_mean': None,
'gpu_std': None
}
for s in sizes
}
for d in dtypes
},
'cpu': {
d: {
s: {
'mean': None,
'std': None,
'cpu_mean': None,
'cpu_std': None,
'gpu_mean': None,
'gpu_std': None
}
for s in sizes
}
for d in dtypes
},
}
def gather_time(prof):
cpu_time = prof.cpu_times.mean() * 1000
gpu_time = prof.gpu_times.mean() * 1000
cpu_std = prof.cpu_times.std() * 1000
gpu_std = prof.gpu_times.std() * 1000
return {
'cpu_mean': cpu_time,
'cpu_std': cpu_std,
'gpu_mean': gpu_time,
'gpu_std': gpu_std
}
def call_cpu(ar, axis=0):
ar = cp.moveaxis(ar, axis, 0)
orig_shape = ar.shape
ar_cpu = ar.reshape(orig_shape[0], math.prod(orig_shape[1:]))
ar_cpu = cp.asnumpy(cp.ascontiguousarray(ar_cpu))
_, sorted_indices = np.unique(ar_cpu, return_index=True, axis=0)
ar = cp.take(ar, sorted_indices, 0)
ar = ar.reshape(sorted_indices.shape[0], *orig_shape[1:])
return cp.moveaxis(ar, 0, axis)
funcs = {
'gpu': cp.unique,
'cpu': call_cpu
}
headers = {
'gpu': 'CuPy-only',
'cpu': 'NumPy call'
}
for dtype in dtypes:
print(dtype)
for size in sizes:
x = testing.shaped_random(size, dtype=dtype)
for comp_id in tqdm.tqdm(funcs):
func = funcs[comp_id]
prof = benchmark(func, (x,), n_repeat=100)
time_results = gather_time(prof)
times[comp_id][dtype][size] = time_results
lines = []
# for kind in funcs:
# header = headers[kind]
# lines.append(f'## {header}\n')
# kind_times = times[kind]
# # lines.append('| Size | `dtype` | CPU time (ms) | GPU time (ms) |')
# # lines.append('|:----:|:-------:|:-------------:|:-------------:|')
# lines.append('| Size | `dtype` | max(CPU, GPU) time (ms) |')
# lines.append('|:----:|:-------:|:-------------:|')
# for dtype in dtypes:
# dtype_times = kind_times[dtype]
# dtype_name = dtype.__name__
# for size in sizes:
# size_str = 'x'.join([str(i) for i in size])
# size_times = dtype_times[size]
# cpu_time = size_times['cpu_mean']
# gpu_time = size_times['gpu_mean']
# if cpu_time is not None:
# # lines.append(
# # f'| {size_str} | `{dtype_name}` | {cpu_time:3f} '
# # f'| {gpu_time:3f} |')
# max_time = max(cpu_time, gpu_time)
# lines.append(
# f'| {size_str} | `{dtype_name}` | {max_time:3f} ')
# lines.append('\n')
lines.append(f'| Size | `dtype` | {headers["gpu"]} (ms) | {headers["cpu"]} (ms) |')
lines.append('|:----:|:-------:|:-------------:|:-------------:|')
for dtype in dtypes:
dtype_name = dtype.__name__
for size in sizes:
size_str = 'x'.join([str(i) for i in size])
line = f'| {size_str} | `{dtype_name}`'
comp = 2
for kind in funcs:
kind_times = times[kind]
dtype_times = kind_times[dtype]
size_times = dtype_times[size]
cpu_time = size_times['cpu_mean']
gpu_time = size_times['gpu_mean']
if cpu_time is not None:
max_time = max(cpu_time, gpu_time)
line = f'{line} | {max_time:3f}'
comp += 1
if comp == 4:
lines.append(line)
print('\n'.join(lines))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment