Last active
August 14, 2024 04:25
-
-
Save rofinn/def2ffdc396f7f7d1fe41340f7128134 to your computer and use it in GitHub Desktop.
WebUI NIM RAG Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
services: | |
nim: | |
image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 | |
# Directory for storage cached models (for faster restarts) | |
# We'll pull this from NFS for our example | |
volumes: | |
- ./data/nim:/opt/nim/.cache:rw | |
# Set the NGC_CLI_API_KEY environment variable, so that the container can download models | |
env_file: | |
- path: ./.config/ngc.env | |
required: true | |
# Specify our GPU | |
deploy: | |
resources: | |
reservations: | |
devices: | |
- capabilities: [gpu] | |
driver: nvidia | |
count: 1 | |
# Include a health check to wait until the container is online | |
healthcheck: | |
test: httpx http://localhost:8000/v1/models | |
interval: 30s | |
timeout: 30s | |
retries: 10 | |
start_period: 60s | |
webui: | |
image: ghcr.io/open-webui/open-webui:v0.3.11-cuda | |
# TODO: Move these to an .env file | |
environment: | |
- "OPENAI_API_BASE_URL=http://nim:8000/v1" | |
- "OPENAI_API_KEY=foobar" | |
#- "WEBUI_AUTH=False" | |
volumes: | |
- ./data/webui:/app/backend/data:rw | |
deploy: | |
resources: | |
reservations: | |
devices: | |
- capabilities: [gpu] | |
driver: nvidia | |
count: 1 | |
healthcheck: | |
test: curl --fail http://localhost:8080 | |
interval: 30s | |
timeout: 30s | |
retries: 3 | |
start_period: 60s | |
depends_on: | |
nim: | |
condition: service_healthy | |
caddy: | |
image: caddy:latest | |
ports: | |
- 80:80 | |
- 443:443 | |
volumes: | |
- ./.config/caddy/Caddyfile:/etc/caddy/Caddyfile | |
- ./.config/caddy:/config | |
- ./data/caddy:/data | |
healthcheck: | |
test: curl --fail https://localhost:443 | |
interval: 30s | |
timeout: 30s | |
retries: 3 | |
start_period: 60s | |
depends_on: | |
webui: | |
condition: service_healthy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
mkdir -p ~/workspace/webinar-demo | |
cp compose.yml ~/workspace/webinar-demo/compose.yml | |
cd ~/workspace/webinar-demo | |
ROOT=$(pwd) | |
# curl -s https://gist.github.com/rofinn/def2ffdc396f7f7d1fe41340f7128134/raw/compose.yml > compose.yml | |
sudo apt install parallel -y | |
# NOTE: AFAICT all NVIDIA personal access tokens are 71 characters long | |
# This help us avoid a double couple situation. | |
# We do this early to avoid prompting part way through some slow steps | |
read -r -n 71 -s -p 'Enter your NGC API Key: ' ngc_api_key | |
CACHE_LOC=/home/ubuntu/personal/webinar-cache | |
# TODO: Ask for this variable rather than hard coding it | |
# read -r -p 'Cache location containing images/ and data/ (press enter to skip): ' CACHE_LOC | |
if [[ -d "${CACHE_LOC}/data" && -d "${CACHE_LOC}/images" ]] | |
then | |
echo "Pulling cached data/ and images/ directories from ${CACHE_LOC}" | |
time sudo find "${CACHE_LOC}" -mindepth 1 -type d -printf '%P\n' | sudo parallel -j 12 --bar mkdir -p "${CACHE_LOC}"/{} {} | |
time sudo find "${CACHE_LOC}" -mindepth 1 -type f -printf '%P\n' | sudo parallel -j 12 --bar cp -np "${CACHE_LOC}"/{} {} | |
else | |
echo "Setting up data/ directory" | |
echo "data/nim" | |
mkdir -p data/nim | |
echo "data/webui" | |
mkdir -p data/webui | |
echo "data/caddy" | |
mkdir -p data/caddy | |
fi | |
echo "Setting up .config/ directory" | |
echo ".config/nim" | |
mkdir -p .config/nim | |
echo ".config/caddy" | |
mkdir -p .config/caddy | |
echo 'Writing key to .config/ngc-api-key' | |
echo "$ngc_api_key" > .config/ngc-api-key | |
chmod 600 .config/ngc-api-key | |
echo 'Writing key to docker environment variable in .config/ngc.env' | |
echo "NGC_API_KEY=$ngc_api_key" >> .config/ngc.env | |
IP=$(curl ifconfig.me) | |
echo "Writing .config/caddy/Caddyfile" | |
tee .config/caddy/Caddyfile <<EOF | |
$IP.nip.io { | |
reverse_proxy webui:8080 | |
} | |
EOF | |
if [[ -f "data/webui/docs/index.html" ]] | |
then | |
echo "HTML docs already found. Skipping download." | |
else | |
echo 'Downloading Denvr docs to data/webui/docs' | |
mkdir -p data/webui/docs | |
cd data/webui/docs | |
wget -q https://docs.denvrdata.com/docs/sitemap.xml --output-document - | grep -E -o "https://docs\.denvrdata\.com[^<]+" | wget -q -E -i - --wait 0 | |
cd "$ROOT" | |
fi | |
if [[ -f "images/nim.tar" ]] | |
then | |
echo "Loading archived docker images" | |
time sudo parallel -j 3 --bar docker load -i images/{} ::: nim.tar webui.tar caddy.tar | |
else | |
echo 'Logging into nvcr.io' | |
sudo cat .config/ngc-api-key | docker login nvcr.io --username '$oauthtoken' --password-stdin | |
echo 'Pulling down docker images' | |
time sudo docker compose pull | |
fi | |
echo 'Starting docker services' | |
time sudo docker compose up -d | |
echo "Configuration complete. Open ${IP}.nip.io in your browser." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment