Skip to content

Instantly share code, notes, and snippets.

@SteelPh0enix
Last active March 17, 2025 12:40
Show Gist options
  • Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
llama.cpp PowerShell utils
# Collection of variables, aliases and Functions to work w/ locally hosted LLMs
# Source to activate.
$env:LLM_VENV_ACTIVE = 0
$env:LLM_PYTHON_VENV_PATH = "$env:USERPROFILE\.llm.venv"
$env:LLAMA_ARG_HOST = "steelph0enix.pc"
$env:LLAMA_ARG_PORT = 51536
# llm python venv stuff
Function llm-venv-activate {
if ($env:LLM_VENV_ACTIVE -eq 1) {
Write-Host "llama.cpp virtual environment is active!"
return
}
if (-Not (Test-Path -Path $env:LLM_PYTHON_VENV_PATH)) {
llm-venv-initialize
}
. $env:LLM_PYTHON_VENV_PATH\Scripts\Activate.ps1
$env:ROCM_VERSION = "6.2.4"
$env:USE_ROCM = 1
$env:HIP_PLATFORM = "amd"
$env:GPU_ARCHS = "gfx1100"
$env:HSA_OVERRIDE_GFX_VERSION = "11.0.0"
$env:GFX_ARCH = $env:GPU_ARCHS
$env:AMDGPU_TARGETS = $env:GPU_ARCHS
$env:PYTORCH_ROCM_ARCH = $env:GPU_ARCHS
$env:HIP_ARCHS = $env:GPU_ARCHS
# HIP_PATH should be set by ROCm installer
$env:ROCM_PATH = $env:HIP_PATH
# llama.cpp-related variables (tweak if necessary)
$env:LLAMA_CPP_PATH = "$env:USERPROFILE\.llama.cpp"
$env:LLAMA_ARG_BATCH = 1024 * 8
$env:LLAMA_ARG_UBATCH = [int]$env:LLAMA_ARG_BATCH / 8
$env:LLAMA_ARG_N_GPU_LAYERS = 999
$env:LLAMA_ARG_FLASH_ATTN = 1
$env:LLAMA_ARG_MLOCK = 1
# openwebui-related variables (tweak if necessary)
$env:OPEN_WEBUI_ROOT_DIR = "F:/openwebui"
$env:OPEN_WEBUI_HOST = $env:LLAMA_ARG_HOST
$env:OPEN_WEBUI_PORT = 55569
$env:PATH = "${env:ROCM_PATH}bin;${env:PATH}"
$env:CMAKE_MODULE_PATH = "${env:CMAKE_MODULE_PATH};${env:ROCM_PATH}cmake"
Import-VisualStudioVars -Architecture x64
$env:LLM_VENV_ACTIVE = 1
Write-Host "llama.cpp python virtual environment activated!"
}
Function llm-venv-update {
llm-venv-activate
Write-Host "Updating python virtualenv for LLMs..."
# sentencepiece must be installed manually before running that
uv pip install --upgrade pip setuptools wheel
uv pip install --upgrade sentencepiece==0.2.1 numpy transformers gguf protobuf torch
Write-Host "Updated!"
}
Function llm-venv-initialize {
Write-Host "Initializing python virtualenv for LLMs..."
# python -m venv $env:LLM_PYTHON_VENV_PATH
uv venv --no-project --no-config $env:LLM_PYTHON_VENV_PATH
# llm-venv-update
Write-Host "Virtual env initialized!"
}
Function llm-venv-remove {
Write-Host "Removing python virtualenv for LLMs..."
rm -Force -Recurse $env:LLM_PYTHON_VENV_PATH
Write-Host "Deleted!"
}
# llama.cpp management functions
Function llama-cpp-clone {
llm-venv-activate
Write-Host "Pulling llama.cpp repository to $env:LLAMA_CPP_PATH"
git clone [email protected]:ggerganov/llama.cpp.git $env:LLAMA_CPP_PATH --recurse-submodules
Write-Host "Pulled!"
}
Function llama-cpp-update {
llm-venv-activate
Write-Host "Updating llama.cpp..."
Push-Location $env:LLAMA_CPP_PATH
git fetch
git clean -xddf
git pull
git submodule update --recursive
git lfs pull
llm-venv-update
Pop-Location
Write-Host "llama.cpp updated!"
}
Function llama-cpp-clean {
llm-venv-activate
Write-Host "Performing clean build of llama.cpp..."
Push-Location $env:LLAMA_CPP_PATH
git clean -xddf
Pop-Location
Write-Host "Cleaning done!"
}
Function llama-cpp-remove {
llm-venv-activate
Write-Host "Removing llama.cpp repository and build files..."
rm -Force -Recurse $env:LLAMA_CPP_PATH
Write-Host "Removed!"
}
Function llama-cpp-build-rocm {
llm-venv-activate
Write-Host "Building llama.cpp for ROCm..."
Push-Location $env:LLAMA_CPP_PATH
cmake -S . -B build -G Ninja `
-DCMAKE_BUILD_TYPE=Release `
-DCMAKE_CXX_COMPILER=clang++ `
-DCMAKE_C_COMPILER=clang `
-DCMAKE_INSTALL_PREFIX="C:/Users/phoen/AppData/Local/llama-cpp" `
-DLLAMA_BUILD_TESTS=OFF `
-DLLAMA_BUILD_EXAMPLES=ON `
-DLLAMA_BUILD_SERVER=ON `
-DLLAMA_STANDALONE=ON `
-DLLAMA_CURL=OFF `
-DGGML_CCACHE=ON `
-DGGML_NATIVE=ON `
-DGGML_OPENMP=ON `
-DGGML_AVX=ON `
-DGGML_AVX2=ON `
-DGGML_FMA=ON `
-DGGML_HIP=ON `
-DAMDGPU_TARGETS=gfx1100 `
-DGGML_CUDA_FA_ALL_QUANTS=ON
cmake --build build --config Release --parallel 24
cmake --install build --config Release
Pop-Location
Write-Host "llama.cpp build completed!"
}
# build vulkan from msys
# openwebui management functions
function openwebui-pull {
llm-venv-activate
Push-Location $env:OPEN_WEBUI_ROOT_DIR
docker compose pull
Pop-Location
}
function openwebui-start {
llm-venv-activate
Push-Location $env:OPEN_WEBUI_ROOT_DIR
docker compose up
Pop-Location
}
function openwebui-start-daemon {
llm-venv-activate
Push-Location $env:OPEN_WEBUI_ROOT_DIR
docker compose up -d
Pop-Location
}
function openwebui-stop {
llm-venv-activate
Push-Location $env:OPEN_WEBUI_ROOT_DIR
docker compose down
Pop-Location
}
# llm utility functions
Function llm-quantize-model {
Param(
[Parameter(Mandatory=$true)]
[string]$model_path,
[Parameter(Mandatory=$false)]
[string]$quantization = 'auto'
)
llm-venv-activate
$output_file = Split-Path -Path $model_path -Leaf
Write-Host "Converting ${model_path} to ${output_file}..."
Invoke-Expression "python $env:LLAMA_CPP_PATH/convert_hf_to_gguf.py --outtype $quantization --outfile $output_file.$quantization.gguf $model_path"
}
Function llm-server {
Param(
[Parameter(Mandatory=$true)]
[string]$model_path,
[Parameter(Mandatory=$true)]
[string]$model_name,
[Parameter(Mandatory=$true)]
[int]$context_size_k
)
llm-venv-activate
$env:LLAMA_ARG_CTX_SIZE = $context_size_k * 1024
llama-server `
--model $model_path `
--alias $model_name `
--slots `
--props
}
Function llm-cli {
Param(
[Parameter(Mandatory=$true)]
[string]$model_path,
[Parameter(Mandatory=$true)]
[int]$context_size_k
)
llm-venv-activate
$env:LLAMA_ARG_CTX_SIZE = $context_size_k * 1024
llama-cli `
--prompt $env:LLAMA_CPP_PROMPT `
--model $model_path `
--conversation
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment