merrymercy’s gists

merrymercy / activation.py

Created September 14, 2024 21:22

	"""
	Copyright 2023-2024 SGLang Team
	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at
	http://www.apache.org/licenses/LICENSE-2.0
	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and

merrymercy / gist:e3041f0029db232adbe845cf995b3290

Last active July 17, 2024 22:00

intern-lm-perf

model: InternLM-7B

python3 benchmark_serving.py --backend openai --host 127.0.0.1 --port 30000 --dataset ShareGPT_V3_unfiltered_cleaned_split.json --model internlm/internlm2-chat-7b --tokenizer internlm/internlm2-chat-7b --num-prompts 3000 --trust-remote-code

H100 w/o streaming

SGLang

Successful requests: 3000

merrymercy / event_overlap.py

Created May 9, 2024 19:58

	import cupy as cp
	import cupy.cuda.nccl as nccl
	from mpi4py import MPI
	import time
	import os
	import jax.numpy as jnp


	nbytes = 1024102432
	data_type = cp.float32

merrymercy / two_recv_overlap.py

Created May 9, 2024 14:06

	import cupy as cp
	import cupy.cuda.nccl as nccl
	from mpi4py import MPI
	import time
	import os
	import jax.numpy as jnp


	nbytes = 1024102432
	data_type = cp.float32

merrymercy / interleaved_overlap.py

Last active May 9, 2024 11:53

	# mpirun -np 2 python p2p-nonblocking.py

	import cupy as cp
	import cupy.cuda.nccl as nccl
	from mpi4py import MPI
	import time
	import os
	import jax.numpy as jnp

merrymercy / test_cp_nccl_jax_compute.py

Created May 9, 2024 05:19

	# mpirun -np 2 python p2p-nonblocking.py

	import cupy as cp
	import cupy.cuda.nccl as nccl
	from mpi4py import MPI
	import time
	import os
	import jax.numpy as jnp

merrymercy / test_nccl_overlap.py

Last active May 8, 2024 17:09

	# mpirun -np 2 python p2p-nonblocking.py

	import cupy as cp
	import cupy.cuda.nccl as nccl
	from mpi4py import MPI
	import time
	import os

	nbytes = 1024102432
	data_type = cp.float32

merrymercy / test_nccl.py

Created May 2, 2024 01:08

	# mpirun -np 2 python p2p-nonblocking.py

	import cupy as cp
	import cupy.cuda.nccl as nccl
	from mpi4py import MPI
	import time
	import os

	nbytes = 1024102432
	data_type = cp.float32

merrymercy / redirect.py

Created March 21, 2023 00:57

Permanently redirect to another url.

	from fastapi import FastAPI
	from starlette.responses import RedirectResponse

	app = FastAPI()

	@app.get("/")
	async def redirect():
	response = RedirectResponse(url="https://alpa-projects.github.io/opt", status_code=301)
	return response

merrymercy / test_pad.py

Last active August 21, 2022 18:24

test padding in the middle

	"""Use huggingface/transformers interface and Alpa backend for distributed inference."""
	from transformers import AutoTokenizer
	from opt_serving.model.wrapper import get_model
	import numpy as np
	import torch

	# Load the tokenizer. We have to use the 30B version because
	# other versions have some issues. The 30B version works for all OPT models.
	tokenizer = AutoTokenizer.from_pretrained("facebook/opt-30b", use_fast=False)
	tokenizer.add_bos_token = False

Lianmin Zheng merrymercy

H100 w/o streaming