Last active
March 17, 2025 12:40
-
-
Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
llama.cpp PowerShell utils
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Collection of variables, aliases and Functions to work w/ locally hosted LLMs | |
# Source to activate. | |
$env:LLM_VENV_ACTIVE = 0 | |
$env:LLM_PYTHON_VENV_PATH = "$env:USERPROFILE\.llm.venv" | |
$env:LLAMA_ARG_HOST = "steelph0enix.pc" | |
$env:LLAMA_ARG_PORT = 51536 | |
# llm python venv stuff | |
Function llm-venv-activate { | |
if ($env:LLM_VENV_ACTIVE -eq 1) { | |
Write-Host "llama.cpp virtual environment is active!" | |
return | |
} | |
if (-Not (Test-Path -Path $env:LLM_PYTHON_VENV_PATH)) { | |
llm-venv-initialize | |
} | |
. $env:LLM_PYTHON_VENV_PATH\Scripts\Activate.ps1 | |
$env:ROCM_VERSION = "6.2.4" | |
$env:USE_ROCM = 1 | |
$env:HIP_PLATFORM = "amd" | |
$env:GPU_ARCHS = "gfx1100" | |
$env:HSA_OVERRIDE_GFX_VERSION = "11.0.0" | |
$env:GFX_ARCH = $env:GPU_ARCHS | |
$env:AMDGPU_TARGETS = $env:GPU_ARCHS | |
$env:PYTORCH_ROCM_ARCH = $env:GPU_ARCHS | |
$env:HIP_ARCHS = $env:GPU_ARCHS | |
# HIP_PATH should be set by ROCm installer | |
$env:ROCM_PATH = $env:HIP_PATH | |
# llama.cpp-related variables (tweak if necessary) | |
$env:LLAMA_CPP_PATH = "$env:USERPROFILE\.llama.cpp" | |
$env:LLAMA_ARG_BATCH = 1024 * 8 | |
$env:LLAMA_ARG_UBATCH = [int]$env:LLAMA_ARG_BATCH / 8 | |
$env:LLAMA_ARG_N_GPU_LAYERS = 999 | |
$env:LLAMA_ARG_FLASH_ATTN = 1 | |
$env:LLAMA_ARG_MLOCK = 1 | |
# openwebui-related variables (tweak if necessary) | |
$env:OPEN_WEBUI_ROOT_DIR = "F:/openwebui" | |
$env:OPEN_WEBUI_HOST = $env:LLAMA_ARG_HOST | |
$env:OPEN_WEBUI_PORT = 55569 | |
$env:PATH = "${env:ROCM_PATH}bin;${env:PATH}" | |
$env:CMAKE_MODULE_PATH = "${env:CMAKE_MODULE_PATH};${env:ROCM_PATH}cmake" | |
Import-VisualStudioVars -Architecture x64 | |
$env:LLM_VENV_ACTIVE = 1 | |
Write-Host "llama.cpp python virtual environment activated!" | |
} | |
Function llm-venv-update { | |
llm-venv-activate | |
Write-Host "Updating python virtualenv for LLMs..." | |
# sentencepiece must be installed manually before running that | |
uv pip install --upgrade pip setuptools wheel | |
uv pip install --upgrade sentencepiece==0.2.1 numpy transformers gguf protobuf torch | |
Write-Host "Updated!" | |
} | |
Function llm-venv-initialize { | |
Write-Host "Initializing python virtualenv for LLMs..." | |
# python -m venv $env:LLM_PYTHON_VENV_PATH | |
uv venv --no-project --no-config $env:LLM_PYTHON_VENV_PATH | |
# llm-venv-update | |
Write-Host "Virtual env initialized!" | |
} | |
Function llm-venv-remove { | |
Write-Host "Removing python virtualenv for LLMs..." | |
rm -Force -Recurse $env:LLM_PYTHON_VENV_PATH | |
Write-Host "Deleted!" | |
} | |
# llama.cpp management functions | |
Function llama-cpp-clone { | |
llm-venv-activate | |
Write-Host "Pulling llama.cpp repository to $env:LLAMA_CPP_PATH" | |
git clone [email protected]:ggerganov/llama.cpp.git $env:LLAMA_CPP_PATH --recurse-submodules | |
Write-Host "Pulled!" | |
} | |
Function llama-cpp-update { | |
llm-venv-activate | |
Write-Host "Updating llama.cpp..." | |
Push-Location $env:LLAMA_CPP_PATH | |
git fetch | |
git clean -xddf | |
git pull | |
git submodule update --recursive | |
git lfs pull | |
llm-venv-update | |
Pop-Location | |
Write-Host "llama.cpp updated!" | |
} | |
Function llama-cpp-clean { | |
llm-venv-activate | |
Write-Host "Performing clean build of llama.cpp..." | |
Push-Location $env:LLAMA_CPP_PATH | |
git clean -xddf | |
Pop-Location | |
Write-Host "Cleaning done!" | |
} | |
Function llama-cpp-remove { | |
llm-venv-activate | |
Write-Host "Removing llama.cpp repository and build files..." | |
rm -Force -Recurse $env:LLAMA_CPP_PATH | |
Write-Host "Removed!" | |
} | |
Function llama-cpp-build-rocm { | |
llm-venv-activate | |
Write-Host "Building llama.cpp for ROCm..." | |
Push-Location $env:LLAMA_CPP_PATH | |
cmake -S . -B build -G Ninja ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_CXX_COMPILER=clang++ ` | |
-DCMAKE_C_COMPILER=clang ` | |
-DCMAKE_INSTALL_PREFIX="C:/Users/phoen/AppData/Local/llama-cpp" ` | |
-DLLAMA_BUILD_TESTS=OFF ` | |
-DLLAMA_BUILD_EXAMPLES=ON ` | |
-DLLAMA_BUILD_SERVER=ON ` | |
-DLLAMA_STANDALONE=ON ` | |
-DLLAMA_CURL=OFF ` | |
-DGGML_CCACHE=ON ` | |
-DGGML_NATIVE=ON ` | |
-DGGML_OPENMP=ON ` | |
-DGGML_AVX=ON ` | |
-DGGML_AVX2=ON ` | |
-DGGML_FMA=ON ` | |
-DGGML_HIP=ON ` | |
-DAMDGPU_TARGETS=gfx1100 ` | |
-DGGML_CUDA_FA_ALL_QUANTS=ON | |
cmake --build build --config Release --parallel 24 | |
cmake --install build --config Release | |
Pop-Location | |
Write-Host "llama.cpp build completed!" | |
} | |
# build vulkan from msys | |
# openwebui management functions | |
function openwebui-pull { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose pull | |
Pop-Location | |
} | |
function openwebui-start { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose up | |
Pop-Location | |
} | |
function openwebui-start-daemon { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose up -d | |
Pop-Location | |
} | |
function openwebui-stop { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose down | |
Pop-Location | |
} | |
# llm utility functions | |
Function llm-quantize-model { | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$false)] | |
[string]$quantization = 'auto' | |
) | |
llm-venv-activate | |
$output_file = Split-Path -Path $model_path -Leaf | |
Write-Host "Converting ${model_path} to ${output_file}..." | |
Invoke-Expression "python $env:LLAMA_CPP_PATH/convert_hf_to_gguf.py --outtype $quantization --outfile $output_file.$quantization.gguf $model_path" | |
} | |
Function llm-server { | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$true)] | |
[string]$model_name, | |
[Parameter(Mandatory=$true)] | |
[int]$context_size_k | |
) | |
llm-venv-activate | |
$env:LLAMA_ARG_CTX_SIZE = $context_size_k * 1024 | |
llama-server ` | |
--model $model_path ` | |
--alias $model_name ` | |
--slots ` | |
--props | |
} | |
Function llm-cli { | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$true)] | |
[int]$context_size_k | |
) | |
llm-venv-activate | |
$env:LLAMA_ARG_CTX_SIZE = $context_size_k * 1024 | |
llama-cli ` | |
--prompt $env:LLAMA_CPP_PROMPT ` | |
--model $model_path ` | |
--conversation | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment