Last active
January 27, 2026 05:41
-
-
Save dertin/5890a15ac96d9028a4b5a58f12e6da62 to your computer and use it in GitHub Desktop.
Bash installer for llama.cpp that builds from a GitHub tag with selectable profiles (native, SYCL, Vulkan, unified). Produces a clean, optimized build and ships a ready-to-run llama-server wrapper, making it easy to test performance and tweak runtime settings. - Tested on: Ubuntu 24.04
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| ############################################################################### | |
| # Bash installer for llama.cpp that builds from a GitHub tag with selectable profiles (native, SYCL, Vulkan, unified). | |
| # Produces a clean, optimized build and ships a ready-to-run llama-server wrapper, | |
| # making it easy to test performance and tweak runtime settings. | |
| # Tested on: Ubuntu 24.04 | |
| # | |
| # - Always overwrites previous build/install/src/tarballs under INSTALL_ROOT | |
| # preserving only: | |
| # /opt/llama.cpp/models | |
| # /opt/llama.cpp/templates | |
| # | |
| # - Profiles: | |
| # PROFILE=sycl -> SYCL | |
| # PROFILE=vulkan -> Vulkan | |
| # PROFILE=unified -> SYCL + Vulkan | |
| # PROFILE=native -> CPU native | |
| # | |
| # - Active install prefix (always the latest run): | |
| # /opt/llama.cpp/install/current/{bin,lib,...} | |
| ############################################################################### | |
| # ---------------- User-configurable ---------------- | |
| INSTALL_ROOT="${INSTALL_ROOT:-/opt/llama.cpp}" | |
| LLAMA_CPP_TAG="${LLAMA_CPP_TAG:-b7836}" | |
| PROFILE="${PROFILE:-sycl}" | |
| ONEAPI_SETVARS="${ONEAPI_SETVARS:-/opt/intel/oneapi/setvars.sh}" | |
| GENERATOR="${GENERATOR:-Ninja}" | |
| BUILD_TYPE="${BUILD_TYPE:-Release}" | |
| C_COMPILER_SYCL="${C_COMPILER_SYCL:-icx}" | |
| CXX_COMPILER_SYCL="${CXX_COMPILER_SYCL:-icpx}" | |
| GGML_SYCL_F16="${GGML_SYCL_F16:-ON}" | |
| # ---------------- Internal paths ---------------- | |
| TARBALL_DIR="${INSTALL_ROOT}/tarballs" | |
| SRC_DIR="${INSTALL_ROOT}/src" | |
| BUILD_DIR="${INSTALL_ROOT}/build/current" | |
| PREFIX="${INSTALL_ROOT}/install/current" | |
| MODELS_DIR="${INSTALL_ROOT}/models" | |
| TEMPLATES_DIR="${INSTALL_ROOT}/templates" | |
| # ---------------- Helpers ---------------- | |
| log() { printf "\n[%s] %s\n" "$(date '+%F %T')" "$*"; } | |
| need_cmd() { command -v "$1" >/dev/null 2>&1 || { echo "Missing command: $1" >&2; exit 1; }; } | |
| die() { echo "ERROR: $*" >&2; exit 1; } | |
| # ---------------- Preconditions ---------------- | |
| need_cmd sudo | |
| need_cmd curl | |
| need_cmd tar | |
| need_cmd cmake | |
| need_cmd rsync | |
| if [[ "${GENERATOR}" == "Ninja" ]]; then | |
| need_cmd ninja | |
| fi | |
| log "Install root: ${INSTALL_ROOT}" | |
| log "Requested: tag=${LLAMA_CPP_TAG}, profile=${PROFILE}" | |
| # ---------------- Clean (single active install) ---------------- | |
| log "Cleaning previous build/install/src/tarballs (preserving: models/, templates/)" | |
| sudo mkdir -p "${INSTALL_ROOT}" | |
| sudo chown -R "$USER":"$USER" "${INSTALL_ROOT}" | |
| mkdir -p "${MODELS_DIR}" "${TEMPLATES_DIR}" | |
| rm -rf "${INSTALL_ROOT}/build" \ | |
| "${INSTALL_ROOT}/install" \ | |
| "${INSTALL_ROOT}/src" \ | |
| "${INSTALL_ROOT}/tarballs" \ | |
| "${INSTALL_ROOT}/llama.cpp-"* 2>/dev/null || true | |
| mkdir -p "${TARBALL_DIR}" "${SRC_DIR}" "${BUILD_DIR}" "${PREFIX}" | |
| # ---------------- System deps ---------------- | |
| log "Installing build/runtime dependencies" | |
| sudo apt-get update | |
| sudo apt-get install -y \ | |
| build-essential \ | |
| cmake \ | |
| pkg-config \ | |
| ninja-build \ | |
| libssl-dev \ | |
| libgomp1 | |
| sudo apt-get install -y \ | |
| libopenblas-dev || true | |
| if [[ "${PROFILE}" == "sycl" || "${PROFILE}" == "unified" ]]; then | |
| sudo apt-get install -y \ | |
| ocl-icd-libopencl1 clinfo \ | |
| libze1 libze-dev || true | |
| [[ -f "${ONEAPI_SETVARS}" ]] || die "oneAPI setvars not found at: ${ONEAPI_SETVARS}" | |
| fi | |
| if [[ "${PROFILE}" == "vulkan" || "${PROFILE}" == "unified" ]]; then | |
| sudo apt-get install -y libvulkan-dev vulkan-tools | |
| fi | |
| # ---------------- Download tarball ---------------- | |
| TARBALL_PATH="${TARBALL_DIR}/llama.cpp-${LLAMA_CPP_TAG}.tar.gz" | |
| TARBALL_URL="https://github.com/ggml-org/llama.cpp/archive/refs/tags/${LLAMA_CPP_TAG}.tar.gz" | |
| log "Downloading llama.cpp tag tarball: ${LLAMA_CPP_TAG}" | |
| log "URL: ${TARBALL_URL}" | |
| curl -L --fail -o "${TARBALL_PATH}" "${TARBALL_URL}" | |
| # ---------------- Extract sources ---------------- | |
| log "Extracting sources into ${SRC_DIR}" | |
| TMP_EXTRACT_DIR="${INSTALL_ROOT}/llama.cpp-${LLAMA_CPP_TAG}" | |
| rm -rf "${TMP_EXTRACT_DIR}" 2>/dev/null || true | |
| tar -xzf "${TARBALL_PATH}" -C "${INSTALL_ROOT}" | |
| if [[ ! -d "${TMP_EXTRACT_DIR}" ]]; then | |
| FOUND_DIR="$(find "${INSTALL_ROOT}" -maxdepth 1 -type d -name "llama.cpp-*" | head -n1 || true)" | |
| [[ -n "${FOUND_DIR}" ]] || die "Could not find extracted source dir under ${INSTALL_ROOT}" | |
| TMP_EXTRACT_DIR="${FOUND_DIR}" | |
| fi | |
| rsync -a --delete "${TMP_EXTRACT_DIR}/" "${SRC_DIR}/" | |
| rm -rf "${TMP_EXTRACT_DIR}" || true | |
| # ---------------- Configure CMake flags ---------------- | |
| log "Configuring build: profile=${PROFILE}, tag=${LLAMA_CPP_TAG}" | |
| rm -rf "${BUILD_DIR}" | |
| mkdir -p "${BUILD_DIR}" "${PREFIX}" | |
| COMMON_CMAKE_FLAGS=( | |
| "-DCMAKE_BUILD_TYPE=${BUILD_TYPE}" | |
| "-DBUILD_SHARED_LIBS=ON" | |
| "-DLLAMA_BUILD_TESTS=OFF" | |
| "-DCMAKE_INSTALL_PREFIX=${PREFIX}" | |
| "-DCMAKE_INSTALL_RPATH=\$ORIGIN:\$ORIGIN/../lib" | |
| "-DCMAKE_BUILD_RPATH=\$ORIGIN:\$ORIGIN/../lib" | |
| "-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON" | |
| ) | |
| PROFILE_FLAGS=() | |
| case "${PROFILE}" in | |
| sycl) | |
| source "${ONEAPI_SETVARS}" >/dev/null 2>&1 || true | |
| PROFILE_FLAGS+=( | |
| "-DGGML_NATIVE=ON" | |
| "-DGGML_OPENMP=ON" | |
| "-DGGML_BLAS=ON" | |
| "-DGGML_BLAS_VENDOR=OpenBLAS" | |
| "-DGGML_SYCL=ON" | |
| "-DGGML_VULKAN=OFF" | |
| "-DGGML_SYCL_F16=${GGML_SYCL_F16}" | |
| "-DCMAKE_C_COMPILER=${C_COMPILER_SYCL}" | |
| "-DCMAKE_CXX_COMPILER=${CXX_COMPILER_SYCL}" | |
| ) | |
| ;; | |
| vulkan) | |
| PROFILE_FLAGS+=( | |
| "-DGGML_NATIVE=ON" | |
| "-DGGML_OPENMP=ON" | |
| "-DGGML_BLAS=ON" | |
| "-DGGML_BLAS_VENDOR=OpenBLAS" | |
| "-DGGML_SYCL=OFF" | |
| "-DGGML_VULKAN=ON" | |
| ) | |
| ;; | |
| unified) | |
| source "${ONEAPI_SETVARS}" >/dev/null 2>&1 || true | |
| PROFILE_FLAGS+=( | |
| "-DGGML_NATIVE=ON" | |
| "-DGGML_OPENMP=ON" | |
| "-DGGML_BLAS=ON" | |
| "-DGGML_BLAS_VENDOR=OpenBLAS" | |
| "-DGGML_SYCL=ON" | |
| "-DGGML_VULKAN=ON" | |
| "-DGGML_SYCL_F16=${GGML_SYCL_F16}" | |
| "-DCMAKE_C_COMPILER=${C_COMPILER_SYCL}" | |
| "-DCMAKE_CXX_COMPILER=${CXX_COMPILER_SYCL}" | |
| ) | |
| ;; | |
| native) | |
| PROFILE_FLAGS+=( | |
| "-DGGML_NATIVE=ON" | |
| "-DGGML_OPENMP=ON" | |
| "-DGGML_BLAS=ON" | |
| "-DGGML_BLAS_VENDOR=OpenBLAS" | |
| "-DGGML_SYCL=OFF" | |
| "-DGGML_VULKAN=OFF" | |
| "-DGGML_LTO=ON" | |
| ) | |
| ;; | |
| *) | |
| die "Unknown PROFILE='${PROFILE}'. Use: native | sycl | vulkan | unified" | |
| ;; | |
| esac | |
| cmake -S "${SRC_DIR}" -B "${BUILD_DIR}" -G "${GENERATOR}" \ | |
| "${COMMON_CMAKE_FLAGS[@]}" \ | |
| "${PROFILE_FLAGS[@]}" | |
| cmake --build "${BUILD_DIR}" -j"$(nproc)" | |
| cmake --install "${BUILD_DIR}" | |
| # ---------------- Post-install: copy runtime .so next to binaries ---------------- | |
| log "Post-install: copying runtime .so next to binaries" | |
| mkdir -p "${PREFIX}/bin" "${PREFIX}/lib" | |
| find "${PREFIX}/lib" -maxdepth 1 -type f -name "*.so*" -exec cp -P {} "${PREFIX}/bin/" \; 2>/dev/null || true | |
| find "${PREFIX}/bin" -maxdepth 1 -type f -name "*.so*" -exec chmod 0644 {} \; 2>/dev/null || true | |
| # ---------------- Loader cache (single active install) ---------------- | |
| log "Registering ${PREFIX}/lib in ld.so.conf.d" | |
| CONF_FILE="/etc/ld.so.conf.d/llama-cpp-current.conf" | |
| echo "${PREFIX}/lib" | sudo tee "${CONF_FILE}" >/dev/null | |
| sudo ldconfig | |
| # ---------------- Convenience symlinks ---------------- | |
| log "Creating symlinks in /usr/local/bin (points to current)" | |
| sudo ln -sf "${PREFIX}/bin/llama-cli" /usr/local/bin/llama-cli | |
| sudo ln -sf "${PREFIX}/bin/llama-server" /usr/local/bin/llama-server | |
| # ---------------- Create run wrapper (always for current) ---------------- | |
| RUN_WRAPPER="${PREFIX}/bin/run-llama-server.sh" | |
| log "Writing runtime wrapper: ${RUN_WRAPPER}" | |
| cat > "${RUN_WRAPPER}" <<EOF | |
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| HERE="\$(cd "\$(dirname "\${BASH_SOURCE[0]}")" && pwd)" | |
| export LD_LIBRARY_PATH="\${HERE}:\${HERE}/../lib:\${LD_LIBRARY_PATH:-}" | |
| # oneAPI only when profile uses SYCL | |
| PROFILE="${PROFILE}" | |
| if [[ "\${PROFILE}" == "sycl" || "\${PROFILE}" == "unified" ]]; then | |
| if [[ -f /opt/intel/oneapi/setvars.sh ]]; then | |
| # shellcheck disable=SC1091 | |
| source /opt/intel/oneapi/setvars.sh >/dev/null 2>&1 || true | |
| fi | |
| export GGML_SYCL_DISABLE_GRAPH="\${GGML_SYCL_DISABLE_GRAPH:-1}" | |
| export GGML_SYCL_DISABLE_DNN="\${GGML_SYCL_DISABLE_DNN:-0}" | |
| export GGML_SYCL_DISABLE_OPT="\${GGML_SYCL_DISABLE_OPT:-0}" | |
| export GGML_SYCL_PRIORITIZE_DMMV="\${GGML_SYCL_PRIORITIZE_DMMV:-0}" | |
| fi | |
| # Prefer P-cores on hybrid Intel; fallback to nproc | |
| if [[ -r /sys/devices/cpu_core/cpus ]]; then | |
| P_CORE_LIST="\$(cat /sys/devices/cpu_core/cpus)" | |
| P_CORES="\$(awk -v RS=',' '{ if(\$0~/-/){split(\$0,a,"-"); n+=a[2]-a[1]+1}else{n++} } END{print n}' /sys/devices/cpu_core/cpus)" | |
| else | |
| P_CORE_LIST="" | |
| P_CORES="\$(nproc)" | |
| fi | |
| export OPENBLAS_NUM_THREADS="\${OPENBLAS_NUM_THREADS:-1}" | |
| export OMP_NUM_THREADS="\${OMP_NUM_THREADS:-\$P_CORES}" | |
| if [[ -n "\${P_CORE_LIST}" && "\${PIN_PCORES:-1}" == "1" ]]; then | |
| exec taskset -c "\${P_CORE_LIST}" "\${HERE}/llama-server" "\$@" | |
| else | |
| exec "\${HERE}/llama-server" "\$@" | |
| fi | |
| EOF | |
| chmod +x "${RUN_WRAPPER}" | |
| # ---------------- Verification ---------------- | |
| log "Verifying installation" | |
| "${PREFIX}/bin/llama-cli" --version || true | |
| "${PREFIX}/bin/llama-cli" --list-devices || true | |
| log "Installed (current):" | |
| echo " ${PREFIX}/bin/llama-cli" | |
| echo " ${PREFIX}/bin/llama-server" | |
| echo " ${RUN_WRAPPER}" | |
| cat <<EOF | |
| NEXT STEPS | |
| ---------- | |
| 1) Run llama-server via wrapper: | |
| ${RUN_WRAPPER} --host 127.0.0.1 --port 8080 \\ | |
| -m "${MODELS_DIR}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf" \\ | |
| -c 8192 | |
| EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment