madkoding · April 16, 2026 07:43
diff --git a/install-bonsai.sh b/install-bonsai.sh
 #!/bin/bash
 # =============================================================================
 # Bonsai-1.7B Installer - One-line setup for llama.cpp + model
 # Usage: curl -sL https://gist.githubusercontent.com/madkoding/c7fc0ddb806ac47b56749cc30e1a5914/raw | bash
 # =============================================================================

 set -e

 INSTALL_DIR="$HOME/llamaBonsai"
 MODEL_URL="https://huggingface.co/prism-ml/Bonsai-1.7B-gguf/resolve/main/Bonsai-1.7B.gguf"
 MODEL_NAME="Bonsai-1.7B.gguf"
 PORT=8080

 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 CYAN='\033[0;36m'
 BOLD='\033[1m'
 RESET='\033[0m'

 clear
 cat <<'EOF'
 ${CYAN}
  ██████╗ ██████╗ ██╗██████╗ ██╗███████╗███████╗
  ██╔══██╗██╔══██╗██║██╔══██╗██║██╔════╝██╔════╝
  ██║  ██║██████╔╝██║██║  ██║██║█████╗  ███████╗
  ██║  ██║██╔══██╗██║██║  ██║██║██╔══╝  ╚════██║
  ██████╔╝██║  ██║██║██████╔╝██║███████╗███████║
  ╚═════╝ ╚═╝  ╚═╝╚═╝╚═════╝ ╚═╝╚══════╝╚══════╝${RESET}
  ${BOLD}Bonsai-1.7B GGUF Q1_0 (1-bit)${RESET}
  ${YELLOW}prism-ml/Bonsai-1.7B-gguf${RESET}

  ${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
  ${GREEN}Installing to:${RESET} $INSTALL_DIR
  ${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
 EOF

 sleep 1

 detect_cuda() {
    if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
        echo "cuda"
    else
        echo "cpu"
    fi
 }

 check_deps() {
    local missing=()
    for cmd in git cmake curl; do
        if ! command -v $cmd &> /dev/null; then
            missing+=($cmd)
        fi
    done
    if [ ${#missing[@]} -ne 0 ]; then
        echo -e "${RED}Error: Missing dependencies: ${missing[*]}${RESET}"
        echo "Install with: sudo apt install ${missing[*]}"
        exit 1
    fi
 }

 mkdir -p "$INSTALL_DIR"
 cd "$INSTALL_DIR"

 echo -e "\n${YELLOW}[1/4]${RESET} Checking dependencies..."
 check_deps

 echo -e "${YELLOW}[2/4]${RESET} Setting up llama.cpp..."
 if [ ! -d "llama.cpp" ]; then
    echo "  Cloning PrismML fork (includes Q1_0 kernels)..."
    git clone --depth 1 https://github.com/PrismML-Eng/llama.cpp 2>&1 | tail -2
 else
    echo "  llama.cpp already present"
 fi

 BACKEND=$(detect_cuda)
 if [ "$BACKEND" = "cuda" ]; then
    GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
    echo -e "  ${GREEN}GPU detected:${RESET} $GPU_NAME"
    echo "  Building with CUDA support..."
    cd llama.cpp
    cmake -B build -DGGML_CUDA=ON 2>&1 | tail -3
    cmake --build build -j$(nproc) 2>&1 | tail -5
 else
    echo -e "  ${YELLOW}No GPU/CUDA found, building CPU version...${RESET}"
    cd llama.cpp
    cmake -B build -DGGML_CUDA=OFF 2>&1 | tail -3
    cmake --build build -j$(nproc) 2>&1 | tail -5
 fi

 echo -e "\n${YELLOW}[3/4]${RESET} Downloading model (248 MB)..."
 if [ ! -f "models/$MODEL_NAME" ]; then
    mkdir -p models
    curl -L "$MODEL_URL" -o "models/$MODEL_NAME" --progress-bar
 else
    echo "  Model already downloaded"
 fi

 echo -e "\n${YELLOW}[4/4]${RESET} Starting server..."
 ./build/bin/llama-server \
    -m "models/$MODEL_NAME" \
    --host 0.0.0.0 \
    --port $PORT \
    -ngl 99 \
    --log-disable \
    &>/dev/null &

 sleep 2

 cat <<'EOF'

 ${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
 ${GREEN}✓ Bonsai-1.7B is running!${RESET}

  ${BOLD}URL:${RESET} http://localhost:$PORT
  ${BOLD}API:${RESET} http://localhost:$PORT/v1/chat/completions

 ${YELLOW}Press Ctrl+C to stop${RESET}
 ${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}

 EOF

 trap "echo -e '\n${RED}Stopping server...${RESET}'; pkill -f llama-server 2>/dev/null; exit 0" SIGINT SIGTERM
 wait
	#!/bin/bash
	# =============================================================================
	# Bonsai-1.7B Installer - One-line setup for llama.cpp + model
	# Usage: curl -sL https://gist.githubusercontent.com/madkoding/c7fc0ddb806ac47b56749cc30e1a5914/raw \| bash
	# =============================================================================

	set -e

	INSTALL_DIR="$HOME/llamaBonsai"
	MODEL_URL="https://huggingface.co/prism-ml/Bonsai-1.7B-gguf/resolve/main/Bonsai-1.7B.gguf"
	MODEL_NAME="Bonsai-1.7B.gguf"
	PORT=8080

	RED='\033[0;31m'
	GREEN='\033[0;32m'
	YELLOW='\033[1;33m'
	CYAN='\033[0;36m'
	BOLD='\033[1m'
	RESET='\033[0m'

	clear
	cat <<'EOF'
	${CYAN}
	██████╗ ██████╗ ██╗██████╗ ██╗███████╗███████╗
	██╔══██╗██╔══██╗██║██╔══██╗██║██╔════╝██╔════╝
	██║ ██║██████╔╝██║██║ ██║██║█████╗ ███████╗
	██║ ██║██╔══██╗██║██║ ██║██║██╔══╝ ╚════██║
	██████╔╝██║ ██║██║██████╔╝██║███████╗███████║
	╚═════╝ ╚═╝ ╚═╝╚═╝╚═════╝ ╚═╝╚══════╝╚══════╝${RESET}
	${BOLD}Bonsai-1.7B GGUF Q1_0 (1-bit)${RESET}
	${YELLOW}prism-ml/Bonsai-1.7B-gguf${RESET}

	${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
	${GREEN}Installing to:${RESET} $INSTALL_DIR
	${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
	EOF

	sleep 1

	detect_cuda() {
	if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
	echo "cuda"
	else
	echo "cpu"
	fi
	}

	check_deps() {
	local missing=()
	for cmd in git cmake curl; do
	if ! command -v $cmd &> /dev/null; then
	missing+=($cmd)
	fi
	done
	if [ ${#missing[@]} -ne 0 ]; then
	echo -e "${RED}Error: Missing dependencies: ${missing[*]}${RESET}"
	echo "Install with: sudo apt install ${missing[*]}"
	exit 1
	fi
	}

	mkdir -p "$INSTALL_DIR"
	cd "$INSTALL_DIR"

	echo -e "\n${YELLOW}[1/4]${RESET} Checking dependencies..."
	check_deps

	echo -e "${YELLOW}[2/4]${RESET} Setting up llama.cpp..."
	if [ ! -d "llama.cpp" ]; then
	echo " Cloning PrismML fork (includes Q1_0 kernels)..."
	git clone --depth 1 https://github.com/PrismML-Eng/llama.cpp 2>&1 \| tail -2
	else
	echo " llama.cpp already present"
	fi

	BACKEND=$(detect_cuda)
	if [ "$BACKEND" = "cuda" ]; then
	GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader \| head -1)
	echo -e " ${GREEN}GPU detected:${RESET} $GPU_NAME"
	echo " Building with CUDA support..."
	cd llama.cpp
	cmake -B build -DGGML_CUDA=ON 2>&1 \| tail -3
	cmake --build build -j$(nproc) 2>&1 \| tail -5
	else
	echo -e " ${YELLOW}No GPU/CUDA found, building CPU version...${RESET}"
	cd llama.cpp
	cmake -B build -DGGML_CUDA=OFF 2>&1 \| tail -3
	cmake --build build -j$(nproc) 2>&1 \| tail -5
	fi

	echo -e "\n${YELLOW}[3/4]${RESET} Downloading model (248 MB)..."
	if [ ! -f "models/$MODEL_NAME" ]; then
	mkdir -p models
	curl -L "$MODEL_URL" -o "models/$MODEL_NAME" --progress-bar
	else
	echo " Model already downloaded"
	fi

	echo -e "\n${YELLOW}[4/4]${RESET} Starting server..."
	./build/bin/llama-server \
	-m "models/$MODEL_NAME" \
	--host 0.0.0.0 \
	--port $PORT \
	-ngl 99 \
	--log-disable \
	&>/dev/null &

	sleep 2

	cat <<'EOF'

	${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}
	${GREEN}✓ Bonsai-1.7B is running!${RESET}

	${BOLD}URL:${RESET} http://localhost:$PORT
	${BOLD}API:${RESET} http://localhost:$PORT/v1/chat/completions

	${YELLOW}Press Ctrl+C to stop${RESET}
	${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}

	EOF

	trap "echo -e '\n${RED}Stopping server...${RESET}'; pkill -f llama-server 2>/dev/null; exit 0" SIGINT SIGTERM
	wait
No results found