Skip to content

Instantly share code, notes, and snippets.

@dertin
Last active January 27, 2026 05:41
Show Gist options
  • Select an option

  • Save dertin/5890a15ac96d9028a4b5a58f12e6da62 to your computer and use it in GitHub Desktop.

Select an option

Save dertin/5890a15ac96d9028a4b5a58f12e6da62 to your computer and use it in GitHub Desktop.
Bash installer for llama.cpp that builds from a GitHub tag with selectable profiles (native, SYCL, Vulkan, unified). Produces a clean, optimized build and ships a ready-to-run llama-server wrapper, making it easy to test performance and tweak runtime settings. - Tested on: Ubuntu 24.04
#!/usr/bin/env bash
set -euo pipefail
###############################################################################
# Bash installer for llama.cpp that builds from a GitHub tag with selectable profiles (native, SYCL, Vulkan, unified).
# Produces a clean, optimized build and ships a ready-to-run llama-server wrapper,
# making it easy to test performance and tweak runtime settings.
# Tested on: Ubuntu 24.04
#
# - Always overwrites previous build/install/src/tarballs under INSTALL_ROOT
# preserving only:
# /opt/llama.cpp/models
# /opt/llama.cpp/templates
#
# - Profiles:
# PROFILE=sycl -> SYCL
# PROFILE=vulkan -> Vulkan
# PROFILE=unified -> SYCL + Vulkan
# PROFILE=native -> CPU native
#
# - Active install prefix (always the latest run):
# /opt/llama.cpp/install/current/{bin,lib,...}
###############################################################################
# ---------------- User-configurable ----------------
INSTALL_ROOT="${INSTALL_ROOT:-/opt/llama.cpp}"
LLAMA_CPP_TAG="${LLAMA_CPP_TAG:-b7836}"
PROFILE="${PROFILE:-sycl}"
ONEAPI_SETVARS="${ONEAPI_SETVARS:-/opt/intel/oneapi/setvars.sh}"
GENERATOR="${GENERATOR:-Ninja}"
BUILD_TYPE="${BUILD_TYPE:-Release}"
C_COMPILER_SYCL="${C_COMPILER_SYCL:-icx}"
CXX_COMPILER_SYCL="${CXX_COMPILER_SYCL:-icpx}"
GGML_SYCL_F16="${GGML_SYCL_F16:-ON}"
# ---------------- Internal paths ----------------
TARBALL_DIR="${INSTALL_ROOT}/tarballs"
SRC_DIR="${INSTALL_ROOT}/src"
BUILD_DIR="${INSTALL_ROOT}/build/current"
PREFIX="${INSTALL_ROOT}/install/current"
MODELS_DIR="${INSTALL_ROOT}/models"
TEMPLATES_DIR="${INSTALL_ROOT}/templates"
# ---------------- Helpers ----------------
log() { printf "\n[%s] %s\n" "$(date '+%F %T')" "$*"; }
need_cmd() { command -v "$1" >/dev/null 2>&1 || { echo "Missing command: $1" >&2; exit 1; }; }
die() { echo "ERROR: $*" >&2; exit 1; }
# ---------------- Preconditions ----------------
need_cmd sudo
need_cmd curl
need_cmd tar
need_cmd cmake
need_cmd rsync
if [[ "${GENERATOR}" == "Ninja" ]]; then
need_cmd ninja
fi
log "Install root: ${INSTALL_ROOT}"
log "Requested: tag=${LLAMA_CPP_TAG}, profile=${PROFILE}"
# ---------------- Clean (single active install) ----------------
log "Cleaning previous build/install/src/tarballs (preserving: models/, templates/)"
sudo mkdir -p "${INSTALL_ROOT}"
sudo chown -R "$USER":"$USER" "${INSTALL_ROOT}"
mkdir -p "${MODELS_DIR}" "${TEMPLATES_DIR}"
rm -rf "${INSTALL_ROOT}/build" \
"${INSTALL_ROOT}/install" \
"${INSTALL_ROOT}/src" \
"${INSTALL_ROOT}/tarballs" \
"${INSTALL_ROOT}/llama.cpp-"* 2>/dev/null || true
mkdir -p "${TARBALL_DIR}" "${SRC_DIR}" "${BUILD_DIR}" "${PREFIX}"
# ---------------- System deps ----------------
log "Installing build/runtime dependencies"
sudo apt-get update
sudo apt-get install -y \
build-essential \
cmake \
pkg-config \
ninja-build \
libssl-dev \
libgomp1
sudo apt-get install -y \
libopenblas-dev || true
if [[ "${PROFILE}" == "sycl" || "${PROFILE}" == "unified" ]]; then
sudo apt-get install -y \
ocl-icd-libopencl1 clinfo \
libze1 libze-dev || true
[[ -f "${ONEAPI_SETVARS}" ]] || die "oneAPI setvars not found at: ${ONEAPI_SETVARS}"
fi
if [[ "${PROFILE}" == "vulkan" || "${PROFILE}" == "unified" ]]; then
sudo apt-get install -y libvulkan-dev vulkan-tools
fi
# ---------------- Download tarball ----------------
TARBALL_PATH="${TARBALL_DIR}/llama.cpp-${LLAMA_CPP_TAG}.tar.gz"
TARBALL_URL="https://github.com/ggml-org/llama.cpp/archive/refs/tags/${LLAMA_CPP_TAG}.tar.gz"
log "Downloading llama.cpp tag tarball: ${LLAMA_CPP_TAG}"
log "URL: ${TARBALL_URL}"
curl -L --fail -o "${TARBALL_PATH}" "${TARBALL_URL}"
# ---------------- Extract sources ----------------
log "Extracting sources into ${SRC_DIR}"
TMP_EXTRACT_DIR="${INSTALL_ROOT}/llama.cpp-${LLAMA_CPP_TAG}"
rm -rf "${TMP_EXTRACT_DIR}" 2>/dev/null || true
tar -xzf "${TARBALL_PATH}" -C "${INSTALL_ROOT}"
if [[ ! -d "${TMP_EXTRACT_DIR}" ]]; then
FOUND_DIR="$(find "${INSTALL_ROOT}" -maxdepth 1 -type d -name "llama.cpp-*" | head -n1 || true)"
[[ -n "${FOUND_DIR}" ]] || die "Could not find extracted source dir under ${INSTALL_ROOT}"
TMP_EXTRACT_DIR="${FOUND_DIR}"
fi
rsync -a --delete "${TMP_EXTRACT_DIR}/" "${SRC_DIR}/"
rm -rf "${TMP_EXTRACT_DIR}" || true
# ---------------- Configure CMake flags ----------------
log "Configuring build: profile=${PROFILE}, tag=${LLAMA_CPP_TAG}"
rm -rf "${BUILD_DIR}"
mkdir -p "${BUILD_DIR}" "${PREFIX}"
COMMON_CMAKE_FLAGS=(
"-DCMAKE_BUILD_TYPE=${BUILD_TYPE}"
"-DBUILD_SHARED_LIBS=ON"
"-DLLAMA_BUILD_TESTS=OFF"
"-DCMAKE_INSTALL_PREFIX=${PREFIX}"
"-DCMAKE_INSTALL_RPATH=\$ORIGIN:\$ORIGIN/../lib"
"-DCMAKE_BUILD_RPATH=\$ORIGIN:\$ORIGIN/../lib"
"-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON"
)
PROFILE_FLAGS=()
case "${PROFILE}" in
sycl)
source "${ONEAPI_SETVARS}" >/dev/null 2>&1 || true
PROFILE_FLAGS+=(
"-DGGML_NATIVE=ON"
"-DGGML_OPENMP=ON"
"-DGGML_BLAS=ON"
"-DGGML_BLAS_VENDOR=OpenBLAS"
"-DGGML_SYCL=ON"
"-DGGML_VULKAN=OFF"
"-DGGML_SYCL_F16=${GGML_SYCL_F16}"
"-DCMAKE_C_COMPILER=${C_COMPILER_SYCL}"
"-DCMAKE_CXX_COMPILER=${CXX_COMPILER_SYCL}"
)
;;
vulkan)
PROFILE_FLAGS+=(
"-DGGML_NATIVE=ON"
"-DGGML_OPENMP=ON"
"-DGGML_BLAS=ON"
"-DGGML_BLAS_VENDOR=OpenBLAS"
"-DGGML_SYCL=OFF"
"-DGGML_VULKAN=ON"
)
;;
unified)
source "${ONEAPI_SETVARS}" >/dev/null 2>&1 || true
PROFILE_FLAGS+=(
"-DGGML_NATIVE=ON"
"-DGGML_OPENMP=ON"
"-DGGML_BLAS=ON"
"-DGGML_BLAS_VENDOR=OpenBLAS"
"-DGGML_SYCL=ON"
"-DGGML_VULKAN=ON"
"-DGGML_SYCL_F16=${GGML_SYCL_F16}"
"-DCMAKE_C_COMPILER=${C_COMPILER_SYCL}"
"-DCMAKE_CXX_COMPILER=${CXX_COMPILER_SYCL}"
)
;;
native)
PROFILE_FLAGS+=(
"-DGGML_NATIVE=ON"
"-DGGML_OPENMP=ON"
"-DGGML_BLAS=ON"
"-DGGML_BLAS_VENDOR=OpenBLAS"
"-DGGML_SYCL=OFF"
"-DGGML_VULKAN=OFF"
"-DGGML_LTO=ON"
)
;;
*)
die "Unknown PROFILE='${PROFILE}'. Use: native | sycl | vulkan | unified"
;;
esac
cmake -S "${SRC_DIR}" -B "${BUILD_DIR}" -G "${GENERATOR}" \
"${COMMON_CMAKE_FLAGS[@]}" \
"${PROFILE_FLAGS[@]}"
cmake --build "${BUILD_DIR}" -j"$(nproc)"
cmake --install "${BUILD_DIR}"
# ---------------- Post-install: copy runtime .so next to binaries ----------------
log "Post-install: copying runtime .so next to binaries"
mkdir -p "${PREFIX}/bin" "${PREFIX}/lib"
find "${PREFIX}/lib" -maxdepth 1 -type f -name "*.so*" -exec cp -P {} "${PREFIX}/bin/" \; 2>/dev/null || true
find "${PREFIX}/bin" -maxdepth 1 -type f -name "*.so*" -exec chmod 0644 {} \; 2>/dev/null || true
# ---------------- Loader cache (single active install) ----------------
log "Registering ${PREFIX}/lib in ld.so.conf.d"
CONF_FILE="/etc/ld.so.conf.d/llama-cpp-current.conf"
echo "${PREFIX}/lib" | sudo tee "${CONF_FILE}" >/dev/null
sudo ldconfig
# ---------------- Convenience symlinks ----------------
log "Creating symlinks in /usr/local/bin (points to current)"
sudo ln -sf "${PREFIX}/bin/llama-cli" /usr/local/bin/llama-cli
sudo ln -sf "${PREFIX}/bin/llama-server" /usr/local/bin/llama-server
# ---------------- Create run wrapper (always for current) ----------------
RUN_WRAPPER="${PREFIX}/bin/run-llama-server.sh"
log "Writing runtime wrapper: ${RUN_WRAPPER}"
cat > "${RUN_WRAPPER}" <<EOF
#!/usr/bin/env bash
set -euo pipefail
HERE="\$(cd "\$(dirname "\${BASH_SOURCE[0]}")" && pwd)"
export LD_LIBRARY_PATH="\${HERE}:\${HERE}/../lib:\${LD_LIBRARY_PATH:-}"
# oneAPI only when profile uses SYCL
PROFILE="${PROFILE}"
if [[ "\${PROFILE}" == "sycl" || "\${PROFILE}" == "unified" ]]; then
if [[ -f /opt/intel/oneapi/setvars.sh ]]; then
# shellcheck disable=SC1091
source /opt/intel/oneapi/setvars.sh >/dev/null 2>&1 || true
fi
export GGML_SYCL_DISABLE_GRAPH="\${GGML_SYCL_DISABLE_GRAPH:-1}"
export GGML_SYCL_DISABLE_DNN="\${GGML_SYCL_DISABLE_DNN:-0}"
export GGML_SYCL_DISABLE_OPT="\${GGML_SYCL_DISABLE_OPT:-0}"
export GGML_SYCL_PRIORITIZE_DMMV="\${GGML_SYCL_PRIORITIZE_DMMV:-0}"
fi
# Prefer P-cores on hybrid Intel; fallback to nproc
if [[ -r /sys/devices/cpu_core/cpus ]]; then
P_CORE_LIST="\$(cat /sys/devices/cpu_core/cpus)"
P_CORES="\$(awk -v RS=',' '{ if(\$0~/-/){split(\$0,a,"-"); n+=a[2]-a[1]+1}else{n++} } END{print n}' /sys/devices/cpu_core/cpus)"
else
P_CORE_LIST=""
P_CORES="\$(nproc)"
fi
export OPENBLAS_NUM_THREADS="\${OPENBLAS_NUM_THREADS:-1}"
export OMP_NUM_THREADS="\${OMP_NUM_THREADS:-\$P_CORES}"
if [[ -n "\${P_CORE_LIST}" && "\${PIN_PCORES:-1}" == "1" ]]; then
exec taskset -c "\${P_CORE_LIST}" "\${HERE}/llama-server" "\$@"
else
exec "\${HERE}/llama-server" "\$@"
fi
EOF
chmod +x "${RUN_WRAPPER}"
# ---------------- Verification ----------------
log "Verifying installation"
"${PREFIX}/bin/llama-cli" --version || true
"${PREFIX}/bin/llama-cli" --list-devices || true
log "Installed (current):"
echo " ${PREFIX}/bin/llama-cli"
echo " ${PREFIX}/bin/llama-server"
echo " ${RUN_WRAPPER}"
cat <<EOF
NEXT STEPS
----------
1) Run llama-server via wrapper:
${RUN_WRAPPER} --host 127.0.0.1 --port 8080 \\
-m "${MODELS_DIR}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf" \\
-c 8192
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment