Created
September 25, 2024 10:58
-
-
Save reyoung/00c6f9c42f258d800144d1fd0b0bd5df to your computer and use it in GitHub Desktop.
benchmark_kv_cache_swapping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import json | |
import time | |
import torch | |
def benchmark_with_warmups(fn, n_warmup: int, n_iter: int) -> float: | |
for _ in range(n_warmup): | |
fn() | |
begin = time.time() | |
for _ in range(n_iter): | |
fn() | |
return time.time() - begin | |
g_dtype_map = { | |
"fp8": torch.float8_e4m3fn, | |
"bf16": torch.bfloat16, | |
} | |
class KVCacheCopySimulator: | |
def __init__(self, num_kv_heads: int, num_layers: int, head_dim: int, page_size: int, dtype: torch.dtype): | |
self._kv_cache_per_layer = torch.rand((2, page_size, num_kv_heads, head_dim), device="cuda:0").to(dtype) | |
self._kv_cache_all = torch.rand((num_layers, 2, page_size, num_kv_heads, head_dim), device="cuda:0").to(dtype) | |
self._kv_cache_per_layer_cpu = torch.empty_like(self._kv_cache_per_layer, device="cpu").pin_memory() | |
self._kv_cache_all_cpu = torch.empty_like(self._kv_cache_all, device="cpu").pin_memory() | |
self._page_size = page_size | |
self._num_layers = num_layers | |
def copy_size(self, num_pages: int) -> int: | |
return self._kv_cache_all.nbytes * num_pages | |
def benchmark_copy_layer_by_layer(self, num_pages: int): | |
for _ in range(self._num_layers * num_pages): | |
self._kv_cache_per_layer_cpu.copy_(self._kv_cache_per_layer, non_blocking=True) | |
torch.cuda.synchronize() | |
def benchmark_copy_all(self, num_pages: int): | |
for _ in range(num_pages): | |
self._kv_cache_all_cpu.copy_(self._kv_cache_per_layer, non_blocking=True) | |
torch.cuda.synchronize() | |
def benchmark_main(dtype: str, num_pages_begin: int, | |
num_pages_end: int, n_warmup: int, n_iter: int, **kwargs): | |
dtype = g_dtype_map[dtype] | |
simulator = KVCacheCopySimulator(dtype=dtype, **kwargs) | |
for num_pages in range(num_pages_begin, num_pages_end + 1, 1): | |
layer_time = benchmark_with_warmups(fn=lambda: simulator.benchmark_copy_layer_by_layer(num_pages), | |
n_warmup=n_warmup, n_iter=n_iter) / n_iter | |
model_time = benchmark_with_warmups(fn=lambda: simulator.benchmark_copy_all(num_pages), | |
n_warmup=n_warmup, n_iter=n_iter) / n_iter | |
n_bytes = simulator.copy_size(num_pages) | |
res = { | |
"layer_by_layer GB/s": n_bytes / 1024 / 1024 / 1024 / layer_time, | |
"model GB/s": n_bytes / 1024 / 1024 / 1024 / model_time, | |
"num_pages": num_pages, | |
"speed_up": layer_time / model_time, | |
} | |
res.update(kwargs) | |
print(json.dumps(res)) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--num_kv_heads", default=4, type=int) | |
parser.add_argument("--num_layers", default=28, type=int) | |
parser.add_argument("--page_size", default=16, type=int) | |
parser.add_argument("--head_dim", default=128, type=int) | |
parser.add_argument("--dtype", default="fp8", choices=list(g_dtype_map.keys()), type=str) | |
parser.add_argument("--num_pages_begin", default=1, type=int) | |
parser.add_argument("--num_pages_end", default=16, type=int) | |
parser.add_argument("--n_warmup", default=100, type=int) | |
parser.add_argument("--n_iter", default=1000, type=int) | |
args = parser.parse_args() | |
benchmark_main(**vars(args)) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -xe | |
testing_page_size=(4 8 16 24 32 64 128) | |
for page_size in "${testing_page_size[@]}" | |
do | |
python -u benchmark_kv_cache_copy.py --page_size "${page_size}" | |
done | tee benchmark_result.jsonl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"layer_by_layer GB/s": 0.9986869941081926, "model GB/s": 3.7717104875440945, "num_pages": 1, "speed_up": 3.7766692765556202, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0427370917643553, "model GB/s": 4.874810937857042, "num_pages": 2, "speed_up": 4.6750144176886, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0542193482194753, "model GB/s": 5.475523108011212, "num_pages": 3, "speed_up": 5.193912554592269, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0616535134984624, "model GB/s": 5.96728648303053, "num_pages": 4, "speed_up": 5.620747642389047, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0644298433815036, "model GB/s": 6.2193544088003865, "num_pages": 5, "speed_up": 5.8428974417352, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.070376881770741, "model GB/s": 6.436488761287202, "num_pages": 6, "speed_up": 6.0132920197596365, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0705600093947103, "model GB/s": 6.5907414373953115, "num_pages": 7, "speed_up": 6.156349368142087, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0661123517028497, "model GB/s": 6.686666965178585, "num_pages": 8, "speed_up": 6.272009656860551, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0579567161359391, "model GB/s": 6.848419275446752, "num_pages": 9, "speed_up": 6.4732509099803135, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0666092729295995, "model GB/s": 6.9141457557485415, "num_pages": 10, "speed_up": 6.482360439974257, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0681051407328224, "model GB/s": 6.954813470434987, "num_pages": 11, "speed_up": 6.511356612012296, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0682716175969136, "model GB/s": 7.057692964099764, "num_pages": 12, "speed_up": 6.606646519333825, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0706234635294065, "model GB/s": 6.563666096027427, "num_pages": 13, "speed_up": 6.130695169326582, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.061522794391057, "model GB/s": 7.165862525578085, "num_pages": 14, "speed_up": 6.750549835991779, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0650761529449355, "model GB/s": 7.194198392867466, "num_pages": 15, "speed_up": 6.754632871063264, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.0702876472709382, "model GB/s": 7.208511836521249, "num_pages": 16, "speed_up": 6.7351163539089685, "num_kv_heads": 4, "num_layers": 28, "page_size": 4, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.9420379694432464, "model GB/s": 6.640775547715751, "num_pages": 1, "speed_up": 3.4194880080637984, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 1.9962014247442088, "model GB/s": 9.029800356757738, "num_pages": 2, "speed_up": 4.523491589990628, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.0247977462072706, "model GB/s": 9.676200075595313, "num_pages": 3, "speed_up": 4.77884771144189, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.0417685224694235, "model GB/s": 9.536962381686052, "num_pages": 4, "speed_up": 4.670932222106913, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.055650308186949, "model GB/s": 9.858417283918016, "num_pages": 5, "speed_up": 4.795765721754974, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.0582946094964107, "model GB/s": 9.799954062715331, "num_pages": 6, "speed_up": 4.761200858954307, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.0669790863372417, "model GB/s": 9.644482716201255, "num_pages": 7, "speed_up": 4.665979825316767, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.063726183151668, "model GB/s": 9.688293222249555, "num_pages": 8, "speed_up": 4.694563310455194, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.0695618118623567, "model GB/s": 9.706739467212671, "num_pages": 9, "speed_up": 4.6902389730885945, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.0669664844614184, "model GB/s": 9.100789209064224, "num_pages": 10, "speed_up": 4.402968929336841, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.116626643467385, "model GB/s": 9.842014497399195, "num_pages": 11, "speed_up": 4.6498585509990304, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.1118768323261716, "model GB/s": 9.827560677326597, "num_pages": 12, "speed_up": 4.653472459613954, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.1118941268021265, "model GB/s": 9.826625193718991, "num_pages": 13, "speed_up": 4.6529913924230035, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.1268789069361618, "model GB/s": 9.764344884383119, "num_pages": 14, "speed_up": 4.590926569697838, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.1346256289124876, "model GB/s": 9.70271712175827, "num_pages": 15, "speed_up": 4.545395216069547, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 2.137320453751224, "model GB/s": 9.7902565913162, "num_pages": 16, "speed_up": 4.580621765974897, "num_kv_heads": 4, "num_layers": 28, "page_size": 8, "head_dim": 128} | |
{"layer_by_layer GB/s": 3.926001875366967, "model GB/s": 10.427698574338086, "num_pages": 1, "speed_up": 2.656060517893512, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.119994850006438, "model GB/s": 11.80636766425642, "num_pages": 2, "speed_up": 2.865626801508738, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.160993932657843, "model GB/s": 13.04145319049837, "num_pages": 3, "speed_up": 3.134215863219997, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.1981444529497995, "model GB/s": 12.882656041351025, "num_pages": 4, "speed_up": 3.068654779684655, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.227073082130709, "model GB/s": 12.87117761219544, "num_pages": 5, "speed_up": 3.044938509960079, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.125342579543871, "model GB/s": 13.18697491874655, "num_pages": 6, "speed_up": 3.196576930152695, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.254232442388624, "model GB/s": 13.371295418860306, "num_pages": 7, "speed_up": 3.1430570848998376, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.288892319364516, "model GB/s": 13.346677528220646, "num_pages": 8, "speed_up": 3.1119171418596534, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.298404355972271, "model GB/s": 13.501861026603557, "num_pages": 9, "speed_up": 3.141133292367866, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.295016845717578, "model GB/s": 13.541043264842253, "num_pages": 10, "speed_up": 3.152733446981375, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.294586124295066, "model GB/s": 13.54693767872618, "num_pages": 11, "speed_up": 3.1544221693655854, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.300685601762993, "model GB/s": 13.581322145247642, "num_pages": 12, "speed_up": 3.1579435008409376, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.308165504864994, "model GB/s": 13.552041503127104, "num_pages": 13, "speed_up": 3.1456640855193387, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.285954189422843, "model GB/s": 13.577550282613071, "num_pages": 14, "speed_up": 3.167917733726747, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.3121564953461435, "model GB/s": 13.488619729816122, "num_pages": 15, "speed_up": 3.1280450383406992, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 4.300322449192017, "model GB/s": 13.65642228599326, "num_pages": 16, "speed_up": 3.175674021504864, "num_kv_heads": 4, "num_layers": 28, "page_size": 16, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.063135100995861, "model GB/s": 12.595945679984256, "num_pages": 1, "speed_up": 2.077464128733564, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.256342460269432, "model GB/s": 13.647025374937806, "num_pages": 2, "speed_up": 2.1813104799813163, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.3337619199844175, "model GB/s": 14.602029508267966, "num_pages": 3, "speed_up": 2.30542759464882, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.359386986323651, "model GB/s": 15.175958977485887, "num_pages": 4, "speed_up": 2.386387085755742, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.362919330113368, "model GB/s": 15.142621183220083, "num_pages": 5, "speed_up": 2.3798229079466084, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.374774108171947, "model GB/s": 15.378160875700829, "num_pages": 6, "speed_up": 2.4123460086197035, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.391554017904911, "model GB/s": 15.328130002354289, "num_pages": 7, "speed_up": 2.398185161138433, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.413537144472798, "model GB/s": 15.271827015879035, "num_pages": 8, "speed_up": 2.381186336316822, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.427264865640355, "model GB/s": 15.424095686519538, "num_pages": 9, "speed_up": 2.3997915145796345, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.425546135564681, "model GB/s": 15.503294738511672, "num_pages": 10, "speed_up": 2.4127590731475204, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.433399368801015, "model GB/s": 15.338772732177391, "num_pages": 11, "speed_up": 2.3842407182994547, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.334722196037029, "model GB/s": 15.540896029786717, "num_pages": 12, "speed_up": 2.4532876973688014, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.433752455187986, "model GB/s": 15.530811620908462, "num_pages": 13, "speed_up": 2.413958530279616, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.437182391624, "model GB/s": 15.446475041497624, "num_pages": 14, "speed_up": 2.3995708217925333, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.435891845092936, "model GB/s": 15.5860240678827, "num_pages": 15, "speed_up": 2.421734927035219, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 6.435320524838933, "model GB/s": 15.61066264010686, "num_pages": 16, "speed_up": 2.425778573087868, "num_kv_heads": 4, "num_layers": 28, "page_size": 24, "head_dim": 128} | |
{"layer_by_layer GB/s": 7.941291168858793, "model GB/s": 12.82078799195839, "num_pages": 1, "speed_up": 1.6144462807552247, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.182994030569784, "model GB/s": 14.296513031008356, "num_pages": 2, "speed_up": 1.7471005083958109, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.296891675097056, "model GB/s": 15.362918954601374, "num_pages": 3, "speed_up": 1.8516475273159043, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.355096253203243, "model GB/s": 15.739853251340293, "num_pages": 4, "speed_up": 1.8838625880947597, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.359175874821341, "model GB/s": 16.086233702814013, "num_pages": 5, "speed_up": 1.9243803388881107, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.240918089780939, "model GB/s": 16.222700011316057, "num_pages": 6, "speed_up": 1.9685549394590927, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.422997792186164, "model GB/s": 16.358734102105156, "num_pages": 7, "speed_up": 1.942151061381116, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.433032947711489, "model GB/s": 16.505214293773474, "num_pages": 8, "speed_up": 1.9572097483921924, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.43186322062356, "model GB/s": 16.54367641356729, "num_pages": 9, "speed_up": 1.9620427870678672, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.427878500279714, "model GB/s": 16.60528487283567, "num_pages": 10, "speed_up": 1.9702805246046862, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.453160050413631, "model GB/s": 16.66891461367093, "num_pages": 11, "speed_up": 1.9719151789697018, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.43631901243596, "model GB/s": 16.70060530374692, "num_pages": 12, "speed_up": 1.9796080825213689, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.44635138359038, "model GB/s": 16.713731432375965, "num_pages": 13, "speed_up": 1.9788108111210596, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.453626276463703, "model GB/s": 16.764299152663842, "num_pages": 14, "speed_up": 1.9830896948139791, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.281063229368199, "model GB/s": 16.791389165743443, "num_pages": 15, "speed_up": 2.0276851776948135, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 8.44950469081302, "model GB/s": 16.787101473203986, "num_pages": 16, "speed_up": 1.9867556842067051, "num_kv_heads": 4, "num_layers": 28, "page_size": 32, "head_dim": 128} | |
{"layer_by_layer GB/s": 15.375738166280199, "model GB/s": 16.559817952894157, "num_pages": 1, "speed_up": 1.0770096221598457, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 15.969949403243449, "model GB/s": 17.6529766617699, "num_pages": 2, "speed_up": 1.1053871378067506, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.202983356226778, "model GB/s": 18.2509041004275, "num_pages": 3, "speed_up": 1.1263915847579828, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.309581708603886, "model GB/s": 18.313113880844682, "num_pages": 4, "speed_up": 1.1228438722731842, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.402475748683315, "model GB/s": 18.457320279311165, "num_pages": 5, "speed_up": 1.1252764864348481, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.45478377844809, "model GB/s": 18.607412073617997, "num_pages": 6, "speed_up": 1.1308208192920375, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.49725330727581, "model GB/s": 18.602871551354024, "num_pages": 7, "speed_up": 1.1276344737429456, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.533262291971923, "model GB/s": 18.661873626858206, "num_pages": 8, "speed_up": 1.1287472065280109, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.214879323414856, "model GB/s": 18.75503675303426, "num_pages": 9, "speed_up": 1.1566559564801282, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.569188613675777, "model GB/s": 18.791916749292028, "num_pages": 10, "speed_up": 1.1341482789194437, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.585727246318708, "model GB/s": 18.804378466735322, "num_pages": 11, "speed_up": 1.1337687029014092, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.59928161637666, "model GB/s": 18.86460169868264, "num_pages": 12, "speed_up": 1.1364709711335363, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.608076903608847, "model GB/s": 18.867351488245767, "num_pages": 13, "speed_up": 1.136034689491713, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.586315133045712, "model GB/s": 18.86932234628531, "num_pages": 14, "speed_up": 1.1376440273156907, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.62500622355376, "model GB/s": 18.900577722737033, "num_pages": 15, "speed_up": 1.1368764299143128, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 16.62769068389439, "model GB/s": 18.900019149189323, "num_pages": 16, "speed_up": 1.1366592937343918, "num_kv_heads": 4, "num_layers": 28, "page_size": 64, "head_dim": 128} | |
{"layer_by_layer GB/s": 18.974232050516775, "model GB/s": 18.81643865822622, "num_pages": 1, "speed_up": 0.9916838061287305, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.3987812163615, "model GB/s": 20.061615018783275, "num_pages": 2, "speed_up": 1.0341688374660736, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.55270816010053, "model GB/s": 20.037225034418015, "num_pages": 3, "speed_up": 1.0247800391817945, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.62627122066618, "model GB/s": 20.22108346980026, "num_pages": 4, "speed_up": 1.030306941264918, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.67406594160634, "model GB/s": 20.17530107443048, "num_pages": 5, "speed_up": 1.0254769468757414, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.634187261608837, "model GB/s": 20.21995691129439, "num_pages": 6, "speed_up": 1.0298341684267687, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.720096155905722, "model GB/s": 20.281870955706772, "num_pages": 7, "speed_up": 1.0284874270064253, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.741107955225168, "model GB/s": 20.333796373042755, "num_pages": 8, "speed_up": 1.0300230574272662, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.754510884318435, "model GB/s": 20.333550026594175, "num_pages": 9, "speed_up": 1.0293117428047986, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.765363516690538, "model GB/s": 20.40982518961946, "num_pages": 10, "speed_up": 1.0326056068933274, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.7619136534101, "model GB/s": 20.435320957361405, "num_pages": 11, "speed_up": 1.0340760169162617, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.783300457380726, "model GB/s": 20.43642904669081, "num_pages": 12, "speed_up": 1.033014136883637, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.787828289879876, "model GB/s": 20.466330290603704, "num_pages": 13, "speed_up": 1.0342888563001549, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.796136407752435, "model GB/s": 20.525319066731218, "num_pages": 14, "speed_up": 1.036834594587519, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.80120707804755, "model GB/s": 20.541169646906617, "num_pages": 15, "speed_up": 1.0373695687309599, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} | |
{"layer_by_layer GB/s": 19.80191010874147, "model GB/s": 20.54510597332189, "num_pages": 16, "speed_up": 1.0375315240044614, "num_kv_heads": 4, "num_layers": 28, "page_size": 128, "head_dim": 128} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment