Skip to content

Instantly share code, notes, and snippets.

@rofinn
Last active August 14, 2024 04:25
Show Gist options
  • Save rofinn/def2ffdc396f7f7d1fe41340f7128134 to your computer and use it in GitHub Desktop.
Save rofinn/def2ffdc396f7f7d1fe41340f7128134 to your computer and use it in GitHub Desktop.
WebUI NIM RAG Demo
services:
nim:
image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
# Directory for storage cached models (for faster restarts)
# We'll pull this from NFS for our example
volumes:
- ./data/nim:/opt/nim/.cache:rw
# Set the NGC_CLI_API_KEY environment variable, so that the container can download models
env_file:
- path: ./.config/ngc.env
required: true
# Specify our GPU
deploy:
resources:
reservations:
devices:
- capabilities: [gpu]
driver: nvidia
count: 1
# Include a health check to wait until the container is online
healthcheck:
test: httpx http://localhost:8000/v1/models
interval: 30s
timeout: 30s
retries: 10
start_period: 60s
webui:
image: ghcr.io/open-webui/open-webui:v0.3.11-cuda
# TODO: Move these to an .env file
environment:
- "OPENAI_API_BASE_URL=http://nim:8000/v1"
- "OPENAI_API_KEY=foobar"
#- "WEBUI_AUTH=False"
volumes:
- ./data/webui:/app/backend/data:rw
deploy:
resources:
reservations:
devices:
- capabilities: [gpu]
driver: nvidia
count: 1
healthcheck:
test: curl --fail http://localhost:8080
interval: 30s
timeout: 30s
retries: 3
start_period: 60s
depends_on:
nim:
condition: service_healthy
caddy:
image: caddy:latest
ports:
- 80:80
- 443:443
volumes:
- ./.config/caddy/Caddyfile:/etc/caddy/Caddyfile
- ./.config/caddy:/config
- ./data/caddy:/data
healthcheck:
test: curl --fail https://localhost:443
interval: 30s
timeout: 30s
retries: 3
start_period: 60s
depends_on:
webui:
condition: service_healthy
#!/bin/bash
set -e
mkdir -p ~/workspace/webinar-demo
cp compose.yml ~/workspace/webinar-demo/compose.yml
cd ~/workspace/webinar-demo
ROOT=$(pwd)
# curl -s https://gist.github.com/rofinn/def2ffdc396f7f7d1fe41340f7128134/raw/compose.yml > compose.yml
sudo apt install parallel -y
# NOTE: AFAICT all NVIDIA personal access tokens are 71 characters long
# This help us avoid a double couple situation.
# We do this early to avoid prompting part way through some slow steps
read -r -n 71 -s -p 'Enter your NGC API Key: ' ngc_api_key
CACHE_LOC=/home/ubuntu/personal/webinar-cache
# TODO: Ask for this variable rather than hard coding it
# read -r -p 'Cache location containing images/ and data/ (press enter to skip): ' CACHE_LOC
if [[ -d "${CACHE_LOC}/data" && -d "${CACHE_LOC}/images" ]]
then
echo "Pulling cached data/ and images/ directories from ${CACHE_LOC}"
time sudo find "${CACHE_LOC}" -mindepth 1 -type d -printf '%P\n' | sudo parallel -j 12 --bar mkdir -p "${CACHE_LOC}"/{} {}
time sudo find "${CACHE_LOC}" -mindepth 1 -type f -printf '%P\n' | sudo parallel -j 12 --bar cp -np "${CACHE_LOC}"/{} {}
else
echo "Setting up data/ directory"
echo "data/nim"
mkdir -p data/nim
echo "data/webui"
mkdir -p data/webui
echo "data/caddy"
mkdir -p data/caddy
fi
echo "Setting up .config/ directory"
echo ".config/nim"
mkdir -p .config/nim
echo ".config/caddy"
mkdir -p .config/caddy
echo 'Writing key to .config/ngc-api-key'
echo "$ngc_api_key" > .config/ngc-api-key
chmod 600 .config/ngc-api-key
echo 'Writing key to docker environment variable in .config/ngc.env'
echo "NGC_API_KEY=$ngc_api_key" >> .config/ngc.env
IP=$(curl ifconfig.me)
echo "Writing .config/caddy/Caddyfile"
tee .config/caddy/Caddyfile <<EOF
$IP.nip.io {
reverse_proxy webui:8080
}
EOF
if [[ -f "data/webui/docs/index.html" ]]
then
echo "HTML docs already found. Skipping download."
else
echo 'Downloading Denvr docs to data/webui/docs'
mkdir -p data/webui/docs
cd data/webui/docs
wget -q https://docs.denvrdata.com/docs/sitemap.xml --output-document - | grep -E -o "https://docs\.denvrdata\.com[^<]+" | wget -q -E -i - --wait 0
cd "$ROOT"
fi
if [[ -f "images/nim.tar" ]]
then
echo "Loading archived docker images"
time sudo parallel -j 3 --bar docker load -i images/{} ::: nim.tar webui.tar caddy.tar
else
echo 'Logging into nvcr.io'
sudo cat .config/ngc-api-key | docker login nvcr.io --username '$oauthtoken' --password-stdin
echo 'Pulling down docker images'
time sudo docker compose pull
fi
echo 'Starting docker services'
time sudo docker compose up -d
echo "Configuration complete. Open ${IP}.nip.io in your browser."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment