Skip to content

Instantly share code, notes, and snippets.

@madkoding
Created April 16, 2026 07:43
Show Gist options
  • Select an option

  • Save madkoding/04d7f0da321faf7932e327ee6d97b10e to your computer and use it in GitHub Desktop.

Select an option

Save madkoding/04d7f0da321faf7932e327ee6d97b10e to your computer and use it in GitHub Desktop.
#!/bin/bash
# =============================================================================
# Bonsai-1.7B Installer - One-line setup for llama.cpp + model
# Usage: curl -sL https://gist.githubusercontent.com/madkoding/c7fc0ddb806ac47b56749cc30e1a5914/raw | bash
# =============================================================================
set -e
INSTALL_DIR="$HOME/llamaBonsai"
MODEL_URL="https://huggingface.co/prism-ml/Bonsai-1.7B-gguf/resolve/main/Bonsai-1.7B.gguf"
MODEL_NAME="Bonsai-1.7B.gguf"
PORT=8080
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
RESET='\033[0m'
clear
cat <<'EOF'
${CYAN}
██████╗ ██████╗ ██╗██████╗ ██╗███████╗███████╗
██╔══██╗██╔══██╗██║██╔══██╗██║██╔════╝██╔════╝
██║ ██║██████╔╝██║██║ ██║██║█████╗ ███████╗
██║ ██║██╔══██╗██║██║ ██║██║██╔══╝ ╚════██║
██████╔╝██║ ██║██║██████╔╝██║███████╗███████║
╚═════╝ ╚═╝ ╚═╝╚═╝╚═════╝ ╚═╝╚══════╝╚══════╝${RESET}
${BOLD}Bonsai-1.7B GGUF Q1_0 (1-bit)${RESET}
${YELLOW}prism-ml/Bonsai-1.7B-gguf${RESET}
${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
${GREEN}Installing to:${RESET} $INSTALL_DIR
${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
EOF
sleep 1
detect_cuda() {
if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
echo "cuda"
else
echo "cpu"
fi
}
check_deps() {
local missing=()
for cmd in git cmake curl; do
if ! command -v $cmd &> /dev/null; then
missing+=($cmd)
fi
done
if [ ${#missing[@]} -ne 0 ]; then
echo -e "${RED}Error: Missing dependencies: ${missing[*]}${RESET}"
echo "Install with: sudo apt install ${missing[*]}"
exit 1
fi
}
mkdir -p "$INSTALL_DIR"
cd "$INSTALL_DIR"
echo -e "\n${YELLOW}[1/4]${RESET} Checking dependencies..."
check_deps
echo -e "${YELLOW}[2/4]${RESET} Setting up llama.cpp..."
if [ ! -d "llama.cpp" ]; then
echo " Cloning PrismML fork (includes Q1_0 kernels)..."
git clone --depth 1 https://github.com/PrismML-Eng/llama.cpp 2>&1 | tail -2
else
echo " llama.cpp already present"
fi
BACKEND=$(detect_cuda)
if [ "$BACKEND" = "cuda" ]; then
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
echo -e " ${GREEN}GPU detected:${RESET} $GPU_NAME"
echo " Building with CUDA support..."
cd llama.cpp
cmake -B build -DGGML_CUDA=ON 2>&1 | tail -3
cmake --build build -j$(nproc) 2>&1 | tail -5
else
echo -e " ${YELLOW}No GPU/CUDA found, building CPU version...${RESET}"
cd llama.cpp
cmake -B build -DGGML_CUDA=OFF 2>&1 | tail -3
cmake --build build -j$(nproc) 2>&1 | tail -5
fi
echo -e "\n${YELLOW}[3/4]${RESET} Downloading model (248 MB)..."
if [ ! -f "models/$MODEL_NAME" ]; then
mkdir -p models
curl -L "$MODEL_URL" -o "models/$MODEL_NAME" --progress-bar
else
echo " Model already downloaded"
fi
echo -e "\n${YELLOW}[4/4]${RESET} Starting server..."
./build/bin/llama-server \
-m "models/$MODEL_NAME" \
--host 0.0.0.0 \
--port $PORT \
-ngl 99 \
--log-disable \
&>/dev/null &
sleep 2
cat <<'EOF'
${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
${GREEN}✓ Bonsai-1.7B is running!${RESET}
${BOLD}URL:${RESET} http://localhost:$PORT
${BOLD}API:${RESET} http://localhost:$PORT/v1/chat/completions
${YELLOW}Press Ctrl+C to stop${RESET}
${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
EOF
trap "echo -e '\n${RED}Stopping server...${RESET}'; pkill -f llama-server 2>/dev/null; exit 0" SIGINT SIGTERM
wait
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment