Skip to content

Instantly share code, notes, and snippets.

@nerdalert
Created April 14, 2025 15:12
Show Gist options
  • Save nerdalert/b3a8e03b710c400d3c9a65fb1a2c0520 to your computer and use it in GitHub Desktop.
Save nerdalert/b3a8e03b710c400d3c9a65fb1a2c0520 to your computer and use it in GitHub Desktop.
{"date": "20250411-002323", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 120, "framework": "vllm", "request_rate": 1.0, "burstiness": 1.0, "max_concurrency": null, "duration": 99.19297069497406, "completed": 120, "total_input_tokens": 120000, "total_output_tokens": 12000, "request_throughput": 1.2097631430861078, "request_goodput:": null, "output_throughput": 120.97631430861077, "total_token_throughput": 1330.7394573947186, "mean_ttft_ms": 56.25359537589247, "median_ttft_ms": 55.28098650393076, "std_ttft_ms": 6.545660891106274, "p99_ttft_ms": 78.70767521642848, "mean_tpot_ms": 7.615017463035274, "median_tpot_ms": 7.524641732229014, "std_tpot_ms": 0.5137661324558762, "p99_tpot_ms": 8.92187871250578, "mean_itl_ms": 7.615019605385871, "median_itl_ms": 7.299988501472399, "std_itl_ms": 3.706068885790247, "p99_itl_ms": 8.394360903184861}
{"date": "20250411-002536", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 1200, "framework": "vllm", "request_rate": 10.0, "burstiness": 1.0, "max_concurrency": null, "duration": 124.90926466300152, "completed": 1200, "total_input_tokens": 1200000, "total_output_tokens": 120000, "request_throughput": 9.606973535850488, "request_goodput:": null, "output_throughput": 960.6973535850486, "total_token_throughput": 10567.670889435536, "mean_ttft_ms": 97.18879668534404, "median_ttft_ms": 71.89276549615897, "std_ttft_ms": 48.370498677534286, "p99_ttft_ms": 262.48354728886625, "mean_tpot_ms": 15.743290549374189, "median_tpot_ms": 14.898846939563601, "std_tpot_ms": 3.8875364284412925, "p99_tpot_ms": 27.5725588313953, "mean_itl_ms": 15.743292532256373, "median_itl_ms": 9.145072996034287, "std_itl_ms": 17.471115365749835, "p99_itl_ms": 87.39375397766703}
{"date": "20250411-002855", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 2400, "framework": "vllm", "request_rate": 20.0, "burstiness": 1.0, "max_concurrency": null, "duration": 191.32800173101714, "completed": 2400, "total_input_tokens": 2400000, "total_output_tokens": 240000, "request_throughput": 12.543903549330405, "request_goodput:": null, "output_throughput": 1254.3903549330407, "total_token_throughput": 13798.293904263446, "mean_ttft_ms": 31944.706274405202, "median_ttft_ms": 32123.88337799348, "std_ttft_ms": 19786.66656640827, "p99_ttft_ms": 65442.828196196344, "mean_tpot_ms": 137.94325337278767, "median_tpot_ms": 143.67024642926867, "std_tpot_ms": 20.18264430040262, "p99_tpot_ms": 146.54887980553957, "mean_itl_ms": 137.94325505949024, "median_itl_ms": 143.593716507894, "std_itl_ms": 25.967784584420162, "p99_itl_ms": 150.21148629748495}
{"date": "20250411-003219", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 3600, "framework": "vllm", "request_rate": 30.0, "burstiness": 1.0, "max_concurrency": null, "duration": 194.6617933170055, "completed": 2444, "total_input_tokens": 2444000, "total_output_tokens": 244400, "request_throughput": 12.555108829291228, "request_goodput:": null, "output_throughput": 1255.5108829291228, "total_token_throughput": 13810.61971222035, "mean_ttft_ms": 44535.6725091412, "median_ttft_ms": 51875.03292899055, "std_ttft_ms": 23484.892499548056, "p99_ttft_ms": 67499.76363433264, "mean_tpot_ms": 138.8218586441113, "median_tpot_ms": 143.8858456109448, "std_tpot_ms": 19.015820922716763, "p99_tpot_ms": 145.10669101358596, "mean_itl_ms": 138.82186016718964, "median_itl_ms": 143.8373330165632, "std_itl_ms": 24.35863255784664, "p99_itl_ms": 150.29555146320493}
{"date": "20250411-003544", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 4200, "framework": "vllm", "request_rate": 35.0, "burstiness": 1.0, "max_concurrency": null, "duration": 195.47171885397984, "completed": 2451, "total_input_tokens": 2451000, "total_output_tokens": 245100, "request_throughput": 12.538898283443919, "request_goodput:": null, "output_throughput": 1253.8898283443918, "total_token_throughput": 13792.78811178831, "mean_ttft_ms": 47756.22113712596, "median_ttft_ms": 59723.150503006764, "std_ttft_ms": 23089.71232309665, "p99_ttft_ms": 67582.63804699527, "mean_tpot_ms": 138.93561278326976, "median_tpot_ms": 143.9408895355588, "std_tpot_ms": 18.951985615987752, "p99_tpot_ms": 145.31749173233902, "mean_itl_ms": 138.9356144690003, "median_itl_ms": 143.91992698074318, "std_itl_ms": 24.118204286045607, "p99_itl_ms": 150.02653499250297}
{"date": "20250411-003715", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 2000, "framework": "vllm", "request_rate": "inf", "burstiness": 1.0, "max_concurrency": null, "duration": 81.99138311398565, "completed": 1017, "total_input_tokens": 1017000, "total_output_tokens": 101700, "request_throughput": 12.403742458962443, "request_goodput:": null, "output_throughput": 1240.3742458962442, "total_token_throughput": 13644.116704858687, "mean_ttft_ms": 39409.09476958674, "median_ttft_ms": 39223.32845898927, "std_ttft_ms": 23225.22038251866, "p99_ttft_ms": 79105.34516730579, "mean_tpot_ms": 131.42046224284934, "median_tpot_ms": 143.60463093952137, "std_tpot_ms": 27.885022410759404, "p99_tpot_ms": 145.25056487758354, "mean_itl_ms": 131.42046376862046, "median_itl_ms": 143.58903098036535, "std_itl_ms": 35.02843368497749, "p99_itl_ms": 147.61838480946608}
{"date": "20250411-004654", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 120, "framework": "sgl", "request_rate": 1.0, "burstiness": 1.0, "max_concurrency": null, "duration": 99.1142401729885, "completed": 120, "total_input_tokens": 120000, "total_output_tokens": 12000, "request_throughput": 1.2107241077625037, "request_goodput:": null, "output_throughput": 121.07241077625036, "total_token_throughput": 1331.796518538754, "mean_ttft_ms": 63.73290097447655, "median_ttft_ms": 62.761708002653904, "std_ttft_ms": 8.589265830490163, "p99_ttft_ms": 98.36812816996827, "mean_tpot_ms": 6.90009509210966, "median_tpot_ms": 6.909049005098311, "std_tpot_ms": 0.3760839155111436, "p99_tpot_ms": 8.051085497077432, "mean_itl_ms": 6.900097185615799, "median_itl_ms": 6.528085999889299, "std_itl_ms": 3.5678507805184205, "p99_itl_ms": 17.946764903608706}
{"date": "20250411-004907", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 1200, "framework": "sgl", "request_rate": 10.0, "burstiness": 1.0, "max_concurrency": null, "duration": 124.966012889985, "completed": 1200, "total_input_tokens": 1200000, "total_output_tokens": 120000, "request_throughput": 9.602610919950141, "request_goodput:": null, "output_throughput": 960.2610919950141, "total_token_throughput": 10562.872011945155, "mean_ttft_ms": 102.13567216269439, "median_ttft_ms": 83.97346000128891, "std_ttft_ms": 41.42952749399564, "p99_ttft_ms": 234.77725491626185, "mean_tpot_ms": 18.819966751925858, "median_tpot_ms": 18.41107035344443, "std_tpot_ms": 4.306976384132259, "p99_tpot_ms": 30.879455748395735, "mean_itl_ms": 18.819969065923157, "median_itl_ms": 10.247556492686272, "std_itl_ms": 33.18313281877262, "p99_itl_ms": 149.16702270886162}
{"date": "20250411-005130", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 2400, "framework": "sgl", "request_rate": 20.0, "burstiness": 1.0, "max_concurrency": null, "duration": 134.73752000799868, "completed": 2400, "total_input_tokens": 2400000, "total_output_tokens": 240000, "request_throughput": 17.812410380252835, "request_goodput:": null, "output_throughput": 1781.2410380252836, "total_token_throughput": 19593.65141827812, "mean_ttft_ms": 3487.787896569074, "median_ttft_ms": 3772.7501944900723, "std_ttft_ms": 2302.5272388740054, "p99_ttft_ms": 7663.017748006968, "mean_tpot_ms": 239.54532595758104, "median_tpot_ms": 262.767112035289, "std_tpot_ms": 55.58324671021421, "p99_tpot_ms": 275.7347514347886, "mean_itl_ms": 239.5362551407147, "median_itl_ms": 65.80720501369797, "std_itl_ms": 535.4798936626746, "p99_itl_ms": 2774.827092794003}
{"date": "20250411-005424", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 3600, "framework": "sgl", "request_rate": 30.0, "burstiness": 1.0, "max_concurrency": null, "duration": 164.49131512999884, "completed": 2944, "total_input_tokens": 2944000, "total_output_tokens": 294400, "request_throughput": 17.897601448887027, "request_goodput:": null, "output_throughput": 1789.7601448887028, "total_token_throughput": 19687.36159377573, "mean_ttft_ms": 21103.159615806628, "median_ttft_ms": 28528.328813015833, "std_ttft_ms": 11517.835200921436, "p99_ttft_ms": 38517.40393859742, "mean_tpot_ms": 177.86527329399132, "median_tpot_ms": 176.26630416671475, "std_tpot_ms": 66.15430209863439, "p99_tpot_ms": 275.45120436823726, "mean_itl_ms": 177.84330862224107, "median_itl_ms": 64.41603100392967, "std_itl_ms": 1179.9955174955817, "p99_itl_ms": 626.2124724953902}
{"date": "20250411-005718", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 4200, "framework": "sgl", "request_rate": 35.0, "burstiness": 1.0, "max_concurrency": null, "duration": 164.06633793900255, "completed": 2943, "total_input_tokens": 2943000, "total_output_tokens": 294300, "request_throughput": 17.937866091057412, "request_goodput:": null, "output_throughput": 1793.7866091057413, "total_token_throughput": 19731.65270016315, "mean_ttft_ms": 24054.060236568323, "median_ttft_ms": 29412.183659995208, "std_ttft_ms": 11140.512836846103, "p99_ttft_ms": 40453.83363115543, "mean_tpot_ms": 177.58326685426, "median_tpot_ms": 177.0814276768384, "std_tpot_ms": 65.8325788443248, "p99_tpot_ms": 274.8172715901705, "mean_itl_ms": 177.5613299877699, "median_itl_ms": 64.60740198963322, "std_itl_ms": 1182.1737121959447, "p99_itl_ms": 546.424897762578}
{"date": "20250411-005825", "backend": "vllm", "model_id": "meta-llama/Llama-3.2-1B", "tokenizer_id": "meta-llama/Llama-3.2-1B", "num_prompts": 2000, "framework": "sgl", "request_rate": "inf", "burstiness": 1.0, "max_concurrency": null, "duration": 58.914670080994256, "completed": 1017, "total_input_tokens": 1017000, "total_output_tokens": 101700, "request_throughput": 17.262254012487155, "request_goodput:": null, "output_throughput": 1726.2254012487153, "total_token_throughput": 18988.47941373587, "mean_ttft_ms": 26155.85801325088, "median_ttft_ms": 28878.533768001944, "std_ttft_ms": 15183.86088481392, "p99_ttft_ms": 55832.12032968877, "mean_tpot_ms": 167.21348154231913, "median_tpot_ms": 166.91630618203422, "std_tpot_ms": 73.57734516085918, "p99_tpot_ms": 456.4224050889463, "mean_itl_ms": 167.171974413147, "median_itl_ms": 64.41349801025353, "std_itl_ms": 1180.0214703083666, "p99_itl_ms": 817.3837910234577}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment