Created
November 8, 2022 23:37
-
-
Save andfoy/7244295739c7636ed0e16ac87ca9d458 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import tqdm | |
import cupy as cp | |
import numpy as np | |
from cupy import testing | |
from cupyx.profiler import benchmark | |
dtypes = [cp.uint8, cp.int32, cp.int64, cp.float32, cp.float64, | |
cp.complex64, cp.complex128] | |
sizes = [(x, x) for x in [3, 5, 10, 100, 500, 1000, 5000, 10000]] | |
times = { | |
'gpu': { | |
d: { | |
s: { | |
'cpu_mean': None, | |
'cpu_std': None, | |
'gpu_mean': None, | |
'gpu_std': None | |
} | |
for s in sizes | |
} | |
for d in dtypes | |
}, | |
'cpu': { | |
d: { | |
s: { | |
'mean': None, | |
'std': None, | |
'cpu_mean': None, | |
'cpu_std': None, | |
'gpu_mean': None, | |
'gpu_std': None | |
} | |
for s in sizes | |
} | |
for d in dtypes | |
}, | |
} | |
def gather_time(prof): | |
cpu_time = prof.cpu_times.mean() * 1000 | |
gpu_time = prof.gpu_times.mean() * 1000 | |
cpu_std = prof.cpu_times.std() * 1000 | |
gpu_std = prof.gpu_times.std() * 1000 | |
return { | |
'cpu_mean': cpu_time, | |
'cpu_std': cpu_std, | |
'gpu_mean': gpu_time, | |
'gpu_std': gpu_std | |
} | |
def call_cpu(ar, axis=0): | |
ar = cp.moveaxis(ar, axis, 0) | |
orig_shape = ar.shape | |
ar_cpu = ar.reshape(orig_shape[0], math.prod(orig_shape[1:])) | |
ar_cpu = cp.asnumpy(cp.ascontiguousarray(ar_cpu)) | |
_, sorted_indices = np.unique(ar_cpu, return_index=True, axis=0) | |
ar = cp.take(ar, sorted_indices, 0) | |
ar = ar.reshape(sorted_indices.shape[0], *orig_shape[1:]) | |
return cp.moveaxis(ar, 0, axis) | |
funcs = { | |
'gpu': cp.unique, | |
'cpu': call_cpu | |
} | |
headers = { | |
'gpu': 'CuPy-only', | |
'cpu': 'NumPy call' | |
} | |
for dtype in dtypes: | |
print(dtype) | |
for size in sizes: | |
x = testing.shaped_random(size, dtype=dtype) | |
for comp_id in tqdm.tqdm(funcs): | |
func = funcs[comp_id] | |
prof = benchmark(func, (x,), n_repeat=100) | |
time_results = gather_time(prof) | |
times[comp_id][dtype][size] = time_results | |
lines = [] | |
# for kind in funcs: | |
# header = headers[kind] | |
# lines.append(f'## {header}\n') | |
# kind_times = times[kind] | |
# # lines.append('| Size | `dtype` | CPU time (ms) | GPU time (ms) |') | |
# # lines.append('|:----:|:-------:|:-------------:|:-------------:|') | |
# lines.append('| Size | `dtype` | max(CPU, GPU) time (ms) |') | |
# lines.append('|:----:|:-------:|:-------------:|') | |
# for dtype in dtypes: | |
# dtype_times = kind_times[dtype] | |
# dtype_name = dtype.__name__ | |
# for size in sizes: | |
# size_str = 'x'.join([str(i) for i in size]) | |
# size_times = dtype_times[size] | |
# cpu_time = size_times['cpu_mean'] | |
# gpu_time = size_times['gpu_mean'] | |
# if cpu_time is not None: | |
# # lines.append( | |
# # f'| {size_str} | `{dtype_name}` | {cpu_time:3f} ' | |
# # f'| {gpu_time:3f} |') | |
# max_time = max(cpu_time, gpu_time) | |
# lines.append( | |
# f'| {size_str} | `{dtype_name}` | {max_time:3f} ') | |
# lines.append('\n') | |
lines.append(f'| Size | `dtype` | {headers["gpu"]} (ms) | {headers["cpu"]} (ms) |') | |
lines.append('|:----:|:-------:|:-------------:|:-------------:|') | |
for dtype in dtypes: | |
dtype_name = dtype.__name__ | |
for size in sizes: | |
size_str = 'x'.join([str(i) for i in size]) | |
line = f'| {size_str} | `{dtype_name}`' | |
comp = 2 | |
for kind in funcs: | |
kind_times = times[kind] | |
dtype_times = kind_times[dtype] | |
size_times = dtype_times[size] | |
cpu_time = size_times['cpu_mean'] | |
gpu_time = size_times['gpu_mean'] | |
if cpu_time is not None: | |
max_time = max(cpu_time, gpu_time) | |
line = f'{line} | {max_time:3f}' | |
comp += 1 | |
if comp == 4: | |
lines.append(line) | |
print('\n'.join(lines)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment