Skip to content

Instantly share code, notes, and snippets.

[{"Test name": "serving_meta-llama-Llama-3.3-70B-Instruct_tp2_pp2_sharegpt_qps_01", "GPU": "1xStandard_NC48ads_A100_v4 x 2", "# of req.": 200, "Tput (req/s)": 0.8723546086118796, "Output Tput (tok/s)": 186.62718319338248, "Total Tput (tok/s)": 372.6960594372533, "Mean TTFT (ms)": 183.92802053997002, "Median TTFT (ms)": 149.6454604985047, "P99 TTFT (ms)": 433.51827928232507, "Mean TPOT (ms)": 68.63183632389541, "Median TPOT (ms)": 68.07398973492188, "P99 TPOT (ms)": 91.97054489733009, "Mean ITL (ms)": 68.4992397737343, "Median ITL (ms)": 63.92631100243307, "P99 ITL (ms)": 224.9206623390637}, {"Test name": "serving_meta-llama-Llama-3.3-70B-Instruct_tp2_pp2_sharegpt_qps_04", "GPU": "1xStandard_NC48ads_A100_v4 x 2", "# of req.": 200, "Tput (req/s)": 2.0098153410901136, "Output Tput (tok/s)": 427.7389499675034, "Total Tput (tok/s)": 856.4225131453192, "Mean TTFT (ms)": 250.33063353503167, "Median TTFT (ms)": 210.2748959987366, "P99 TTFT (ms)": 595.3828498008077, "Mean TPOT (ms)": 104.60007571868582, "Median TPOT (
Running over qps list 1
~/vllm/benchmarks /vllm-workspace
Running test case serving_llama70B_tp2_pp2_sharegpt with qps 1
Client command: python3 benchmark_serving.py --save-result --base-url http://llama-3-3-70b-instruct-leader.default:8000 --result-dir /root/results/ --result-filename serving_llama70B_tp2_pp2_sharegpt_qps_1.json --request-rate 1 --model=meta-llama/Llama-3.3-70B-Instruct --backend=vllm --dataset-name=sharegpt --dataset-path=/root/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts=200
Namespace(backend='vllm', base_url='http://llama-3-3-70b-instruct-leader.default:8000', host='127.0.0.1', port=8000, endpoint='/v1/completions', dataset=None, dataset_name='sharegpt', dataset_path='/root/ShareGPT_V3_unfiltered_cleaned_split.json', max_concurrency=None, model='meta-llama/Llama-3.3-70B-Instruct', tokenizer=None, best_of=1, use_beam_search=False, num_prompts=200, logprobs=None, request_rate=1.0, burstiness=1.0, seed
@surajssd
surajssd / list-vm-sizes.go
Last active July 11, 2023 13:41
Using Azure's user managed identity for the Kubernetes workloads
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.
package main
import (
"context"
"fmt"
"log"
"os"
==========================================================================
0. Building a qcow image, instructions at this link
==========================================================================
https://github.com/AMDESE/AMDSEV
==========================================================================
1. Building the host and guest kernels, instructions at this link, please
build the 5.19-rc6 kernel
==========================================================================
https://github.com/AMDESE/AMDSEV/tree/sev-snp-devel
@surajssd
surajssd / ipxe-ubuntu-20
Created June 1, 2022 08:03
Boot Ubuntu on Packet Using iPXE
#!ipxe
dhcp net0
set base-url http://archive.ubuntu.com/ubuntu/dists/focal/main/installer-amd64/current/legacy-images/netboot/ubuntu-installer/amd64/
kernel ${base-url}/linux console=ttyS1,115200n8
initrd ${base-url}/initrd.gz
boot
apiVersion: v1
kind: Namespace
metadata:
labels:
cluster.x-k8s.io/provider: control-plane-kubeadm
control-plane: controller-manager
name: capi-kubeadm-control-plane-system
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
apiVersion: v1
kind: Namespace
metadata:
labels:
cluster.x-k8s.io/provider: bootstrap-kubeadm
control-plane: controller-manager
name: capi-kubeadm-bootstrap-system
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
apiVersion: v1
kind: Namespace
metadata:
labels:
cluster.x-k8s.io/provider: cluster-api
control-plane: controller-manager
name: capi-system
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
@surajssd
surajssd / delete-volumes.sh
Created May 25, 2021 10:29
Delete All Unused AWS EBS Volumes
#!/bin/bash
# Delete all unused volumes from AWS
for region in $(aws ec2 describe-regions --region us-east-1 --output text | cut -f4); do
echo "Region: $region"
for vol in $(aws ec2 describe-volumes --region $region --filter "Name=status,Values=available" | jq -r '.Volumes[].VolumeId'); do
echo "Volume: $vol"
aws ec2 delete-volume --region $region --volume-id $vol
done
done
#!/bin/bash
set -euo pipefail
set -x
# Source: https://docs.docker.com/engine/install/ubuntu/
apt-get update
apt-get -y remove docker docker-engine docker.io containerd runc || true
apt-get install -y \