# 先配置 ngc key, passwd is your ngc key
docker login nvcr.io
docker pull nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3
docker run -it --gpus all --name "ui-tars" --net host nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3
huggingface-cli download bytedance-research/UI-TARS-7B-DPO --local-dir UI-TARS-7B-DPO
python3 -m vllm.entrypoints.openai.api_server --served-model-name ui-tars \
--model UI-TARS-7B-DPO \
--limit-mm-per-prompt image=5
# In another terminal,
# docker exec -it ui-tars bash
pip install gradio-tunneling
# 映射服务端口到公网
gradio-tun 8000
安装 chrome 插件 midscene, 并配置:
OPENAI_BASE_URL="https://d3bd34ef6463c5e82e.gradio.live/v1"
OPENAI_API_KEY="empty"
MIDSCENE_MODEL_NAME="ui-tars"
MIDSCENE_USE_VLM_UI_TARS=1