Last active
January 14, 2025 22:14
-
-
Save qpwo/f2e5d2a99e775f7ac54e05c6254191ae to your computer and use it in GitHub Desktop.
sglang ipv6 issue for https://github.com/sgl-project/sglang/issues/2892
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# modal run issue.py | |
import os | |
import subprocess | |
import time | |
import modal | |
import modal.experimental | |
import modal.gpu | |
import modal.volume | |
image = ( | |
modal.Image.debian_slim(python_version="3.12") | |
.pip_install("torch", "numpy<2") | |
.apt_install("curl", "git", "iproute2") | |
.run_commands("git clone https://github.com/sgl-project/sglang.git") | |
.run_commands( | |
"cd sglang && pip install -e 'python[all]' --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/" | |
) | |
) | |
app = modal.App( | |
os.path.basename(__file__), | |
image=image, | |
secrets=[modal.Secret.from_name("huggingface-secret", required_keys=["HF_TOKEN"])], | |
) | |
n_nodes = 2 | |
n_proc_per_node = 2 | |
total_gpus = n_nodes * n_proc_per_node | |
def shellbg(cmd: str): | |
print(f"Running:\n\t$ {cmd}", flush=True) | |
subprocess.Popen(cmd, shell=True) | |
def mysleep(seconds): | |
"long sleeps block stdout network sync?" | |
for _ in range(seconds * 1000): | |
time.sleep(0.001) | |
@app.function( | |
gpu=modal.gpu.H100(count=n_proc_per_node), | |
timeout=3600 * 24, | |
cpu=64, | |
memory=344_064, # in megabytes | |
) # pyright: ignore | |
@modal.experimental.clustered(size=n_nodes) | |
def fire_it_up(): | |
cluster_info = modal.experimental.get_cluster_info() | |
node_rank = cluster_info.rank | |
main_addr = cluster_info.container_ips[0] | |
my_addr = cluster_info.container_ips[node_rank] | |
num_nodes = len(cluster_info.container_ips) | |
task_id = os.environ["MODAL_TASK_ID"] | |
print(f"{cluster_info=}") | |
print(f"hello from {node_rank=}, {main_addr=}, {my_addr=}, {num_nodes=}, {task_id=}") | |
shellbg("ip address show") | |
tp = num_nodes * n_proc_per_node | |
print("\n\n\n") | |
mysleep(5) | |
if node_rank == 0: | |
shellbg("python3 -m sglang.check_env") | |
mysleep(15) | |
print("\n\n\n") | |
shellbg( | |
f""" | |
export GLOO_SOCKET_IFNAME='eth1' MASTER_ADDR={main_addr} MASTER_PORT=1234; | |
echo GLOO_SOCKET_IFNAME=$GLOO_SOCKET_IFNAME and MASTER_ADDR=$MASTER_ADDR and MASTER_PORT=$MASTER_PORT; | |
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B \ | |
--tp-size {tp} --nnodes {num_nodes} --node-rank {node_rank} \ | |
--port 2242 --dist-init-addr [{main_addr}]:1234 \ | |
--disable-cuda-graph --trust-remote-code | |
""" | |
) | |
for i in range(10000000): | |
shellbg("curl http://localhost:2242/v1/models") | |
if i % 3 == 1: | |
shellbg("nvidia-smi --query-gpu=power.draw,memory.used,memory.free --format=csv") | |
mysleep(10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment