This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cuda_runtime.h> | |
#include <cuda.h> | |
// Define the kernel with illegal memory access | |
__global__ void illegalWildPointerKernel(int* data, int size) { | |
int idx = threadIdx.x + blockIdx.x * blockDim.x; | |
__nanosleep(1000000000ULL); // Sleep for 1 second | |
int* wild_pointer = (int*)0x100; | |
if (idx == 0) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.distributed as dist | |
use_nccl = False | |
dist.init_process_group(backend="nccl" if use_nccl else "gloo") | |
rank = dist.get_rank() | |
torch.cuda.set_device(rank % 8) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.distributed as dist | |
import torch | |
import time | |
dist.init_process_group(backend="nccl") | |
rank = dist.get_rank() | |
torch.cuda.set_device(rank) | |
N_warmup = 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
# an Embedding module containing 10 tensors of size 3 | |
embedding = nn.Embedding(10, 3) | |
embedding.weight.requires_grad_(False) | |
# a batch of 4 indices | |
input = torch.LongTensor([1, 2, 4, 5]) | |
output = embedding(input) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from typing import List | |
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" | |
import torch | |
import torch.distributed as dist | |
dist.init_process_group(backend="gloo") | |
rank = local_rank = dist.get_rank() | |
world_size = dist.get_world_size() | |
torch.cuda.set_device(local_rank) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from dataclasses import dataclass | |
from typing import Optional, Tuple | |
import torch | |
from torch import nn | |
def silly_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, | |
out: torch.Tensor) -> None: | |
out.copy_(q) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from dataclasses import dataclass | |
from typing import Optional, Tuple | |
import torch | |
from torch import nn | |
def silly_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor: | |
out = q.clone() | |
out += k |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from dataclasses import dataclass | |
from typing import Optional, Tuple | |
import torch | |
from torch import nn | |
def silly_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, | |
out: torch.Tensor) -> None: | |
out.copy_(q) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openai | |
import asyncio | |
async def get_choice_completion(prompt, choices): | |
# Initialize an asynchronous OpenAI client | |
async with openai.AsyncClient(base_url="http://127.0.0.1:8000/v1", api_key="abc") as client: | |
choice_probs = {} | |
# Calculate logprobs for each prompt + choice sequence | |
for choice in choices: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
unified benchmark script | |
$ python benchmarks/benchmark_serving.py --model meta-llama/Meta-Llama-3-8B --dataset-name random --random-input-len 256 --random-output-len 256 --num-prompts 100 | |
vLLM default | |
$ vllm serve meta-llama/Meta-Llama-3-8B --disable-log-requests --max-num-seqs 1 | |
============ Serving Benchmark Result ============ | |
Successful requests: 100 | |
Benchmark duration (s): 198.86 |
NewerOlder