Last active
February 8, 2025 22:15
-
-
Save SteelPh0enix/bf38f2b0124afea9cb492ad9e7333487 to your computer and use it in GitHub Desktop.
llama.cpp & OpenWebUI management functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
services: | |
open-webui: | |
image: ghcr.io/open-webui/open-webui:dev | |
container_name: open-webui | |
ports: | |
- ${OPEN_WEBUI_PORT}:${OPEN_WEBUI_PORT} | |
volumes: | |
- //f/openwebui/data:/app/backend/data | |
environment: | |
- ENV=dev | |
- WEBUI_URL=http://${OPEN_WEBUI_HOST}:${OPEN_WEBUI_PORT} | |
- PORT=${OPEN_WEBUI_PORT} | |
- ENABLE_SIGNUP=False | |
- ENABLE_LOGIN_FORM=False | |
- DEFAULT_USER_ROLE=admin | |
- ENABLE_API_KEY_ENDPOINT_RESTRICTIONS=False | |
- ENABLE_OLLAMA_API=False | |
- ENABLE_OPENAI_API=True | |
- OPENAI_API_BASE_URL=http://${LLAMA_ARG_HOST}:${LLAMA_ARG_PORT} | |
- WEBUI_AUTH=False | |
extra_hosts: | |
- host.docker.internal:host-gateway | |
watchtower: | |
image: containrrr/watchtower | |
volumes: | |
- /var/run/docker.sock:/var/run/docker.sock | |
command: --interval 300 open-webui | |
depends_on: | |
- open-webui |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Collection of variables, aliases and Functions to work w/ locally hosted LLMs | |
# Source to activate. | |
$env:LLM_VENV_ACTIVE = 0 | |
$env:LLM_PYTHON_VENV_PATH = "$env:USERPROFILE\.llm.venv" | |
$env:LLAMA_ARG_HOST = "steelph0enix.pc" | |
$env:LLAMA_ARG_PORT = 51536 | |
# llm python venv stuff | |
Function llm-venv-activate { | |
if ($env:LLM_VENV_ACTIVE -eq 1) { | |
Write-Host "llama.cpp virtual environment is active!" | |
return | |
} | |
if (-Not (Test-Path -Path $env:LLM_PYTHON_VENV_PATH)) { | |
llm-venv-initialize | |
} | |
. $env:LLM_PYTHON_VENV_PATH\Scripts\Activate.ps1 | |
$env:ROCM_VERSION = "6.2.4" | |
$env:USE_ROCM = 1 | |
$env:HIP_PLATFORM = "amd" | |
$env:GPU_ARCHS = "gfx1100" | |
$env:HSA_OVERRIDE_GFX_VERSION = "11.0.0" | |
$env:GFX_ARCH = $env:GPU_ARCHS | |
$env:AMDGPU_TARGETS = $env:GPU_ARCHS | |
$env:PYTORCH_ROCM_ARCH = $env:GPU_ARCHS | |
$env:HIP_ARCHS = $env:GPU_ARCHS | |
# HIP_PATH should be set by ROCm installer | |
$env:ROCM_PATH = $env:HIP_PATH | |
# llama.cpp-related variables (tweak if necessary) | |
$env:LLAMA_CPP_PATH = "$env:USERPROFILE\.llama.cpp" | |
$env:LLAMA_ARG_BATCH = 1024 * 8 | |
$env:LLAMA_ARG_UBATCH = [int]$env:LLAMA_ARG_BATCH / 8 | |
$env:LLAMA_ARG_N_GPU_LAYERS = 999 | |
$env:LLAMA_ARG_FLASH_ATTN = 1 | |
$env:LLAMA_ARG_MLOCK = 1 | |
# openwebui-related variables (tweak if necessary) | |
$env:OPEN_WEBUI_ROOT_DIR = "F:/openwebui" | |
$env:OPEN_WEBUI_HOST = $env:LLAMA_ARG_HOST | |
$env:OPEN_WEBUI_PORT = 55569 | |
$env:PATH = "${env:ROCM_PATH}bin;C:\Strawberry\perl\bin;${env:PATH}" | |
$env:CMAKE_MODULE_PATH = "${env:CMAKE_MODULE_PATH};${env:ROCM_PATH}cmake" | |
Import-VisualStudioVars -Architecture x64 | |
$env:LLM_VENV_ACTIVE = 1 | |
Write-Host "llama.cpp python virtual environment activated!" | |
} | |
Function llm-venv-update { | |
llm-venv-activate | |
Write-Host "Updating python virtualenv for LLMs..." | |
uv pip install --upgrade pip setuptools wheel | |
uv pip install --upgrade numpy sentencepiece transformers gguf protobuf torch | |
Write-Host "Updated!" | |
} | |
Function llm-venv-initialize { | |
Write-Host "Initializing python virtualenv for LLMs..." | |
# python -m venv $env:LLM_PYTHON_VENV_PATH | |
uv venv --no-project --no-config $env:LLM_PYTHON_VENV_PATH | |
llm-venv-update | |
Write-Host "Virtual env initialized!" | |
} | |
Function llm-venv-remove { | |
Write-Host "Removing python virtualenv for LLMs..." | |
rm -Force -Recurse $env:LLM_PYTHON_VENV_PATH | |
Write-Host "Deleted!" | |
} | |
# llama.cpp management functions | |
Function llama-cpp-clone { | |
llm-venv-activate | |
Write-Host "Pulling llama.cpp repository to $env:LLAMA_CPP_PATH" | |
git clone [email protected]:ggerganov/llama.cpp.git $env:LLAMA_CPP_PATH --recurse-submodules | |
Write-Host "Pulled!" | |
} | |
Function llama-cpp-update { | |
llm-venv-activate | |
Write-Host "Updating llama.cpp..." | |
Push-Location $env:LLAMA_CPP_PATH | |
git fetch | |
git clean -xddf | |
git pull | |
git submodule update --recursive | |
git lfs pull | |
llm-venv-update | |
Pop-Location | |
Write-Host "llama.cpp updated!" | |
} | |
Function llama-cpp-clean { | |
llm-venv-activate | |
Write-Host "Performing clean build of llama.cpp..." | |
Push-Location $env:LLAMA_CPP_PATH | |
git clean -xddf | |
Pop-Location | |
Write-Host "Cleaning done!" | |
} | |
Function llama-cpp-remove { | |
llm-venv-activate | |
Write-Host "Removing llama.cpp repository and build files..." | |
rm -Force -Recurse $env:LLAMA_CPP_PATH | |
Write-Host "Removed!" | |
} | |
Function llama-cpp-build-rocm { | |
llm-venv-activate | |
Write-Host "Building llama.cpp for ROCm..." | |
Push-Location $env:LLAMA_CPP_PATH | |
cmake -S . -B build -G Ninja ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_CXX_COMPILER=clang++ ` | |
-DCMAKE_C_COMPILER=clang ` | |
-DCMAKE_INSTALL_PREFIX="C:/Users/phoen/AppData/Local/llama-cpp" ` | |
-DLLAMA_BUILD_TESTS=OFF ` | |
-DLLAMA_BUILD_EXAMPLES=ON ` | |
-DLLAMA_BUILD_SERVER=ON ` | |
-DLLAMA_STANDALONE=ON ` | |
-DLLAMA_CURL=OFF ` | |
-DGGML_CCACHE=ON ` | |
-DGGML_NATIVE=ON ` | |
-DGGML_OPENMP=ON ` | |
-DGGML_AVX=ON ` | |
-DGGML_AVX2=ON ` | |
-DGGML_FMA=ON ` | |
-DGGML_HIP=ON ` | |
-DAMDGPU_TARGETS=gfx1100 ` | |
-DGGML_CUDA_FA_ALL_QUANTS=ON | |
cmake --build build --config Release --parallel 24 | |
cmake --install build --config Release | |
Pop-Location | |
Write-Host "llama.cpp build completed!" | |
} | |
# build vulkan from msys | |
# openwebui management functions | |
function openwebui-pull { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose pull | |
Pop-Location | |
} | |
function openwebui-start { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose up | |
Pop-Location | |
} | |
function openwebui-start-daemon { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose up -d | |
Pop-Location | |
} | |
function openwebui-stop { | |
llm-venv-activate | |
Push-Location $env:OPEN_WEBUI_ROOT_DIR | |
docker compose down | |
Pop-Location | |
} | |
# llm utility functions | |
Function llm-quantize-model { | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$false)] | |
[string]$quantization = 'auto' | |
) | |
llm-venv-activate | |
$output_file = Split-Path -Path $model_path -Leaf | |
Write-Host "Converting ${model_path} to ${output_file}..." | |
Invoke-Expression "python $env:LLAMA_CPP_PATH/convert_hf_to_gguf.py --outtype $quantization --outfile $output_file.$quantization.gguf $model_path" | |
} | |
Function llm-server { | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$true)] | |
[string]$model_name, | |
[Parameter(Mandatory=$true)] | |
[int]$context_size_k | |
) | |
llm-venv-activate | |
$env:LLAMA_ARG_CTX_SIZE = $context_size_k * 1024 | |
llama-server ` | |
--model $model_path ` | |
--alias $model_name ` | |
--slots ` | |
--props | |
} | |
Function llm-cli { | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$model_path, | |
[Parameter(Mandatory=$true)] | |
[int]$context_size_k | |
) | |
llm-venv-activate | |
$env:LLAMA_ARG_CTX_SIZE = $context_size_k * 1024 | |
llama-cli ` | |
--prompt $env:LLAMA_CPP_PROMPT ` | |
--model $model_path ` | |
--conversation | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment