Skip to content

Instantly share code, notes, and snippets.

@kgn
Last active January 31, 2025 18:37
Show Gist options
  • Save kgn/e1fc3429f7e0dd5d8e2e81ba6c7d855a to your computer and use it in GitHub Desktop.
Save kgn/e1fc3429f7e0dd5d8e2e81ba6c7d855a to your computer and use it in GitHub Desktop.
#!/bin/bash
#########################################################################################
# Ollama + Chatbox + DeepSeek Installation & Configuration Script
#
# This script:
# - Deploys Ollama and ensures it's configured for remote access
# - Moves Ollama storage to /data (if needed) to optimize space
# - Installs NVIDIA drivers, CUDA, and configures GPU usage
# - Verifies CUDA and VRAM availability
# - Configures UFW to allow external access
# - Downloads and verifies the DeepSeek-r1:671b model
# - Implements a wait loop for DeepSeek to fully load
# - Starts watching logs at the end for debugging
#
# Usage:
# 1. Save and run this script on your server
# 2. Once completed, connect Chatbox to http://<your-server-ip>:11434
#
# DISCLAIMER:
# This script is provided "as is" without any warranties or guarantees.
# Use at your own risk. Always verify before running scripts on production systems.
#########################################################################################
# Stop on errors
set -e
echo "πŸš€ Starting Ollama + DeepSeek installation on H100..."
# Ensure /data and /data/ollama directories exist before proceeding
if [ ! -d "/data" ]; then
echo "πŸ“‚ Creating /data directory..."
sudo mkdir -p /data
fi
if [ ! -d "/data/ollama" ]; then
echo "πŸ“‚ Creating /data/ollama directory..."
sudo mkdir -p /data/ollama
fi
# Set correct ownership and permissions
echo "πŸ”§ Setting correct permissions for Ollama storage..."
sudo chown -R root:root /data/ollama
sudo chmod -R 755 /data/ollama
# Install NVIDIA Drivers and CUDA
echo "βš™οΈ Checking for NVIDIA GPU..."
if ! command -v nvidia-smi &> /dev/null; then
echo "πŸ“₯ Installing NVIDIA Drivers & CUDA..."
sudo apt update
sudo apt install -y nvidia-driver-535 nvidia-utils-535 cuda-toolkit-12-2
else
echo "βœ… NVIDIA Drivers already installed."
fi
# Verify CUDA installation
if ! command -v nvcc &> /dev/null; then
echo "❌ CUDA not installed properly. Please check manually."
exit 1
fi
echo "βœ… CUDA installed successfully: $(nvcc --version | grep release)"
# Check VRAM availability
echo "πŸ–₯ Checking GPU VRAM availability..."
nvidia-smi --query-gpu=memory.free,memory.total --format=csv
# Install Ollama if not already installed
if ! command -v ollama &> /dev/null; then
echo "πŸ“₯ Installing Ollama..."
curl -fsSL https://ollama.com/install.sh | sh
else
echo "βœ… Ollama is already installed."
fi
# Verify Ollama binary
if [ ! -f "/usr/local/bin/ollama" ]; then
echo "❌ Ollama binary not found. Exiting."
exit 1
fi
# Move existing Ollama data if needed
OLLAMA_DIR="/usr/share/ollama/.ollama"
NEW_OLLAMA_DIR="/data/ollama"
if [ -d "$OLLAMA_DIR" ]; then
echo "πŸ“¦ Moving existing Ollama data to $NEW_OLLAMA_DIR..."
sudo mv "$OLLAMA_DIR" "$NEW_OLLAMA_DIR"
fi
# Create a symlink for Ollama storage
echo "πŸ”— Setting up Ollama storage..."
sudo ln -sf "$NEW_OLLAMA_DIR" "$OLLAMA_DIR"
# Set environment variables for GPU execution
echo "βš™οΈ Configuring GPU usage for Ollama..."
export OLLAMA_CUDA_VISIBLE_DEVICES=0
echo 'export OLLAMA_CUDA_VISIBLE_DEVICES=0' >> ~/.bashrc
# Configure systemd for Ollama
echo "βš™οΈ Configuring Ollama for remote access..."
sudo tee /etc/systemd/system/ollama.service > /dev/null <<EOF
[Unit]
Description=Ollama AI Model Service
After=network.target
[Service]
ExecStart=/usr/local/bin/ollama serve
Restart=always
User=root
Environment="OLLAMA_HOST=0.0.0.0:11434"
Environment="OLLAMA_ORIGINS=*"
Environment="CUDA_VISIBLE_DEVICES=0"
[Install]
WantedBy=multi-user.target
EOF
# Reload systemd and enable Ollama service
echo "πŸš€ Starting and enabling Ollama..."
sudo systemctl daemon-reload
sudo systemctl enable ollama
sudo systemctl restart ollama
# Confirm Ollama is running
sleep 3
if ! pgrep -x "ollama" > /dev/null; then
echo "❌ Ollama failed to start. Check logs with: journalctl -u ollama --no-pager --lines=50"
exit 1
fi
# Configure UFW to allow external access
echo "πŸ”“ Configuring firewall to allow remote connections..."
sudo ufw allow 11434/tcp
sudo ufw reload
sudo ufw status
# Pull DeepSeek model if not already present
echo "πŸ“₯ Checking if DeepSeek-r1:671b is available..."
if ! ollama list | grep -q "deepseek-r1:671b"; then
echo "πŸ“₯ Downloading deepseek-r1:671b model..."
ollama pull deepseek-r1:671b
fi
# Confirm model is available
if ! ollama list | grep -q "deepseek-r1:671b"; then
echo "❌ Failed to download deepseek-r1:671b. Please check storage space and internet connection."
exit 1
fi
# Wait for the model to be fully loaded
echo "⏳ Waiting for DeepSeek model to be fully loaded..."
while ! ollama run deepseek-r1:671b --verbose 2>&1 | grep -q "model loaded"; do
echo "⏳ Model still loading..."
sleep 10
done
echo "βœ… DeepSeek model is fully loaded and ready!"
# Verify Ollama is using GPU
echo "πŸ–₯ Verifying Ollama is using GPU..."
ollama run deepseek-r1:671b --verbose | grep "CUDA"
# Log success messages
echo "βœ… Ollama is installed, running in the background, and ready for Chatbox!"
echo "βœ… DeepSeek-r1:671b model is downloaded and ready to use!"
echo "➑️ You can now connect Chatbox by pointing it to: http://<your-server-ip>:11434"
# Start watching logs for real-time debugging
echo "πŸ“œ Watching Ollama logs in real-time (Press Ctrl+C to exit)..."
journalctl -u ollama -f --no-pager
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment