Skip to content

Instantly share code, notes, and snippets.

View tiandiao123's full-sized avatar

Cuiqing Li (李崔卿) tiandiao123

  • Shanghai, China
View GitHub Profile
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include "cutlass/cutlass.h"
#include "cutlass/gemm/device/gemm_splitk_parallel.h"
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
import inspect
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import PIL.Image
import torch
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import StableDiffusionXLImg2ImgPipeline
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
@tiandiao123
tiandiao123 / vllm_benchmark.py
Last active October 17, 2023 03:45
vllm_benchmark.py
from vllm import LLM, SamplingParams
import torch
from torch import distributed as dist
import time
from tqdm import tqdm
import numpy as np
# # Create an LLM.
llm = LLM(
model="/home/lclcq/share/llama-7b",
import os
import warnings
import time
import torch
import torch.distributed as dist
import argparse
from packaging import version
import colossalai
import argparse
import os
import time
import torch
from _utils import print_perf_stats
from transformers import LlamaForCausalLM, LlamaTokenizer
import colossalai
from colossalai.inference.tensor_parallel.engine import TPInferEngine
from vllm import LLM, SamplingParams
import torch
from torch import distributed as dist
import time
from tqdm import tqdm
import numpy as np
# # Create an LLM.
llm = LLM(
model="/home/lclcq/share/llama-7b",
import os
import warnings
import time
import torch
import torch.distributed as dist
import argparse
from packaging import version
import colossalai