Skip to content

Instantly share code, notes, and snippets.

View tiandiao123's full-sized avatar

Cuiqing Li (李崔卿) tiandiao123

  • Shanghai, China
View GitHub Profile
import os
import warnings
import time
import torch
import torch.distributed as dist
import argparse
from packaging import version
import colossalai
from vllm import LLM, SamplingParams
import torch
from torch import distributed as dist
import time
from tqdm import tqdm
import numpy as np
# # Create an LLM.
llm = LLM(
model="/home/lclcq/share/llama-7b",
import argparse
import os
import time
import torch
from _utils import print_perf_stats
from transformers import LlamaForCausalLM, LlamaTokenizer
import colossalai
from colossalai.inference.tensor_parallel.engine import TPInferEngine
import os
import warnings
import time
import torch
import torch.distributed as dist
import argparse
from packaging import version
import colossalai
@tiandiao123
tiandiao123 / vllm_benchmark.py
Last active October 17, 2023 03:45
vllm_benchmark.py
from vllm import LLM, SamplingParams
import torch
from torch import distributed as dist
import time
from tqdm import tqdm
import numpy as np
# # Create an LLM.
llm = LLM(
model="/home/lclcq/share/llama-7b",
import inspect
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import PIL.Image
import torch
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import StableDiffusionXLImg2ImgPipeline
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include "cutlass/cutlass.h"
#include "cutlass/gemm/device/gemm_splitk_parallel.h"