Created
January 14, 2025 14:08
-
-
Save multikatt/43d905434f6a8b7c15dbc38dc4f79cb2 to your computer and use it in GitHub Desktop.
ollama error
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
± % OLLAMA_DEBUG=1 ollama serve !10085 | |
2025/01/14 15:05:34 routes.go:1194: INFO server config env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION:10.3.0 HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_DEBUG:true OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/home/user/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:0 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]" | |
time=2025-01-14T15:05:34.493+01:00 level=INFO source=images.go:753 msg="total blobs: 16" | |
time=2025-01-14T15:05:34.493+01:00 level=INFO source=images.go:760 msg="total unused blobs removed: 0" | |
time=2025-01-14T15:05:34.493+01:00 level=INFO source=routes.go:1245 msg="Listening on 127.0.0.1:11434 (version 0.5.1)" | |
time=2025-01-14T15:05:34.493+01:00 level=DEBUG source=common.go:79 msg="runners located" dir=/usr/lib/ollama/runners | |
time=2025-01-14T15:05:34.493+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/cpu_avx/ollama_llama_server | |
time=2025-01-14T15:05:34.493+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/cpu_avx2/ollama_llama_server | |
time=2025-01-14T15:05:34.493+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/rocm_avx/ollama_llama_server | |
time=2025-01-14T15:05:34.493+01:00 level=INFO source=routes.go:1274 msg="Dynamic LLM libraries" runners="[cpu cpu_avx cpu_avx2 rocm_avx]" | |
time=2025-01-14T15:05:34.493+01:00 level=DEBUG source=routes.go:1275 msg="Override detection logic by setting OLLAMA_LLM_LIBRARY" | |
time=2025-01-14T15:05:34.493+01:00 level=DEBUG source=sched.go:105 msg="starting llm scheduler" | |
time=2025-01-14T15:05:34.493+01:00 level=INFO source=gpu.go:226 msg="looking for compatible GPUs" | |
time=2025-01-14T15:05:34.510+01:00 level=DEBUG source=gpu.go:99 msg="searching for GPU discovery libraries for NVIDIA" | |
time=2025-01-14T15:05:34.510+01:00 level=DEBUG source=gpu.go:517 msg="Searching for GPU library" name=libcuda.so* | |
time=2025-01-14T15:05:34.510+01:00 level=DEBUG source=gpu.go:543 msg="gpu library search" globs="[/usr/lib/ollama/libcuda.so* /usr/lib/ollama/libcuda.so* /home/user/docker/servarr/libcuda.so* /usr/local/cuda*/targets/*/lib/libcuda.so* /usr/lib/*-linux-gnu/nvidia/current/libcuda.so* /usr/lib/*-linux-gnu/libcuda.so* /usr/lib/wsl/lib/libcuda.so* /usr/lib/wsl/drivers/*/libcuda.so* /opt/cuda/lib*/libcuda.so* /usr/local/cuda/lib*/libcuda.so* /usr/lib*/libcuda.so* /usr/local/lib*/libcuda.so*]" | |
time=2025-01-14T15:05:34.537+01:00 level=DEBUG source=gpu.go:577 msg="discovered GPU libraries" paths=[] | |
time=2025-01-14T15:05:34.537+01:00 level=DEBUG source=gpu.go:517 msg="Searching for GPU library" name=libcudart.so* | |
time=2025-01-14T15:05:34.537+01:00 level=DEBUG source=gpu.go:543 msg="gpu library search" globs="[/usr/lib/ollama/libcudart.so* /usr/lib/ollama/libcudart.so* /home/user/docker/servarr/libcudart.so* /usr/lib/ollama/libcudart.so* /usr/lib/ollama/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/x86_64-linux-gnu/libcudart.so* /usr/lib/wsl/lib/libcudart.so* /usr/lib/wsl/drivers/*/libcudart.so* /opt/cuda/lib64/libcudart.so* /usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so* /usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/aarch64-linux-gnu/libcudart.so* /usr/local/cuda/lib*/libcudart.so* /usr/lib*/libcudart.so* /usr/local/lib*/libcudart.so*]" | |
time=2025-01-14T15:05:34.550+01:00 level=DEBUG source=gpu.go:577 msg="discovered GPU libraries" paths=[] | |
time=2025-01-14T15:05:34.550+01:00 level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/linux-drivers" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/0/properties" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:122 msg="detected CPU /sys/class/kfd/kfd/topology/nodes/0/properties" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/1/properties" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:207 msg="mapping amdgpu to drm sysfs nodes" amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties vendor=4098 device=29605 unique_id=0 | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:241 msg=matched amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties drm=/sys/class/drm/card0/device | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:319 msg="amdgpu memory" gpu=0 total="16.0 GiB" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_linux.go:320 msg="amdgpu memory" gpu=0 available="15.4 GiB" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_common.go:18 msg="evaluating potential rocm lib dir /usr/lib/ollama" | |
time=2025-01-14T15:05:34.551+01:00 level=DEBUG source=amd_common.go:18 msg="evaluating potential rocm lib dir /opt/rocm/lib" | |
time=2025-01-14T15:05:34.551+01:00 level=INFO source=amd_linux.go:391 msg="skipping rocm gfx compatibility check" HSA_OVERRIDE_GFX_VERSION=10.3.0 | |
time=2025-01-14T15:05:34.551+01:00 level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1030 driver=0.0 name=1002:73a5 total="16.0 GiB" available="15.4 GiB" | |
[GIN] 2025/01/14 - 15:05:42 | 200 | 26.76µs | 127.0.0.1 | HEAD "/" | |
[GIN] 2025/01/14 - 15:05:42 | 200 | 3.955739ms | 127.0.0.1 | POST "/api/show" | |
time=2025-01-14T15:05:42.558+01:00 level=WARN source=types.go:510 msg="invalid option provided" option=rope_frequency_base | |
time=2025-01-14T15:05:42.559+01:00 level=DEBUG source=gpu.go:406 msg="updating system memory data" before.total="23.4 GiB" before.free="18.2 GiB" before.free_swap="18.0 GiB" now.total="23.4 GiB" now.free="18.2 GiB" now.free_swap="18.0 GiB" | |
time=2025-01-14T15:05:42.559+01:00 level=DEBUG source=amd_linux.go:490 msg="updating rocm free memory" gpu=0 name=1002:73a5 before="15.4 GiB" now="15.4 GiB" | |
time=2025-01-14T15:05:42.559+01:00 level=DEBUG source=sched.go:181 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=0x56335661e520 gpu_count=1 | |
time=2025-01-14T15:05:42.565+01:00 level=DEBUG source=sched.go:224 msg="loading first model" model=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
time=2025-01-14T15:05:42.565+01:00 level=DEBUG source=memory.go:107 msg=evaluating library=rocm gpu_count=1 available="[15.4 GiB]" | |
time=2025-01-14T15:05:42.565+01:00 level=INFO source=sched.go:714 msg="new model will fit in available VRAM in single GPU, loading" model=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac gpu=0 parallel=4 available=16511504384 required="8.7 GiB" | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=gpu.go:406 msg="updating system memory data" before.total="23.4 GiB" before.free="18.2 GiB" before.free_swap="18.0 GiB" now.total="23.4 GiB" now.free="18.2 GiB" now.free_swap="18.0 GiB" | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=amd_linux.go:490 msg="updating rocm free memory" gpu=0 name=1002:73a5 before="15.4 GiB" now="15.4 GiB" | |
time=2025-01-14T15:05:42.566+01:00 level=INFO source=server.go:104 msg="system memory" total="23.4 GiB" free="18.2 GiB" free_swap="18.0 GiB" | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=memory.go:107 msg=evaluating library=rocm gpu_count=1 available="[15.4 GiB]" | |
time=2025-01-14T15:05:42.566+01:00 level=INFO source=memory.go:356 msg="offload to rocm" layers.requested=-1 layers.model=33 layers.offload=33 layers.split="" memory.available="[15.4 GiB]" memory.gpu_overhead="0 B" memory.required.full="8.7 GiB" memory.required.partial="8.7 GiB" memory.required.kv="4.0 GiB" memory.required.allocations="[8.7 GiB]" memory.weights.total="7.4 GiB" memory.weights.repeating="7.3 GiB" memory.weights.nonrepeating="102.6 MiB" memory.graph.full="560.0 MiB" memory.graph.partial="681.0 MiB" | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/cpu_avx/ollama_llama_server | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/cpu_avx2/ollama_llama_server | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/rocm_avx/ollama_llama_server | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/cpu_avx/ollama_llama_server | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/cpu_avx2/ollama_llama_server | |
time=2025-01-14T15:05:42.566+01:00 level=DEBUG source=common.go:123 msg="availableServers : found" file=/usr/lib/ollama/runners/rocm_avx/ollama_llama_server | |
time=2025-01-14T15:05:42.567+01:00 level=INFO source=server.go:376 msg="starting llama server" cmd="/usr/lib/ollama/runners/rocm_avx/ollama_llama_server runner --model /home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac --ctx-size 8192 --batch-size 512 --n-gpu-layers 33 --verbose --threads 8 --parallel 4 --port 39939" | |
time=2025-01-14T15:05:42.567+01:00 level=DEBUG source=server.go:393 msg=subprocess environment="[PATH=/home/user/.pyenv/shims:/run/user/1000/fnm_multishells/3020_1736851916667/bin:/home/user/.fnm:/home/user/.local/bin:/home/user/bin:/home/user/.dotnet:/home/user/.emacs.d/bin:/home/user/.cargo/bin:/home/user/go/bin:/home/user/.nix-profile/bin:/nix/var/nix/profiles/default/bin:/run/user/1000/fnm_multishells/2154_1736851878086/bin:/home/user/.fnm:/home/user/.luarocks/bin:/usr/local/sbin:/usr/local/bin:/usr/bin:/usr/lib/jvm/default/bin:/usr/bin/site_perl:/usr/bin/vendor_perl:/usr/bin/core_perl:/opt/rocm/bin:/usr/lib/rustup/bin:/var/lib/snapd/snap/bin ROCM_PATH=/opt/rocm HSA_OVERRIDE_GFX_VERSION=10.3.0 LD_LIBRARY_PATH=/usr/lib/ollama:/usr/lib/ollama:/opt/rocm/lib:/usr/lib/ollama/runners/rocm_avx ROCR_VISIBLE_DEVICES=0]" | |
time=2025-01-14T15:05:42.567+01:00 level=INFO source=sched.go:449 msg="loaded runners" count=1 | |
time=2025-01-14T15:05:42.567+01:00 level=INFO source=server.go:555 msg="waiting for llama runner to start responding" | |
time=2025-01-14T15:05:42.567+01:00 level=INFO source=server.go:589 msg="waiting for server to become available" status="llm server error" | |
time=2025-01-14T15:05:42.596+01:00 level=INFO source=runner.go:946 msg="starting go runner" | |
time=2025-01-14T15:05:42.596+01:00 level=INFO source=runner.go:947 msg=system info="AVX = 1 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 0 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 0 | FP16_VA = 0 | RISCV_VECT = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | cgo(gcc)" threads=8 | |
time=2025-01-14T15:05:42.596+01:00 level=INFO source=.:0 msg="Server listening on 127.0.0.1:39939" | |
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac (version GGUF V2) | |
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. | |
llama_model_loader: - kv 0: general.architecture str = llama | |
llama_model_loader: - kv 1: general.name str = codellama | |
llama_model_loader: - kv 2: llama.context_length u32 = 16384 | |
llama_model_loader: - kv 3: llama.embedding_length u32 = 4096 | |
llama_model_loader: - kv 4: llama.block_count u32 = 32 | |
llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008 | |
llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128 | |
llama_model_loader: - kv 7: llama.attention.head_count u32 = 32 | |
llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32 | |
llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 | |
llama_model_loader: - kv 10: llama.rope.freq_base f32 = 1000000.000000 | |
llama_model_loader: - kv 11: general.file_type u32 = 2 | |
llama_model_loader: - kv 12: tokenizer.ggml.model str = llama | |
llama_model_loader: - kv 13: tokenizer.ggml.tokens arr[str,32016] = ["<unk>", "<s>", "</s>", "<0x00>", "<... | |
llama_model_loader: - kv 14: tokenizer.ggml.scores arr[f32,32016] = [0.000000, 0.000000, 0.000000, 0.0000... | |
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,32016] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ... | |
llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 = 1 | |
llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 = 2 | |
llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32 = 0 | |
llama_model_loader: - kv 19: general.quantization_version u32 = 2 | |
llama_model_loader: - type f32: 65 tensors | |
llama_model_loader: - type q4_0: 225 tensors | |
llama_model_loader: - type q6_K: 1 tensors | |
llm_load_vocab: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect | |
llm_load_vocab: special_eot_id is not in special_eog_ids - the tokenizer config may be incorrect | |
llm_load_vocab: special tokens cache size = 3 | |
llm_load_vocab: token to piece cache size = 0.1686 MB | |
llm_load_print_meta: format = GGUF V2 | |
llm_load_print_meta: arch = llama | |
llm_load_print_meta: vocab type = SPM | |
llm_load_print_meta: n_vocab = 32016 | |
llm_load_print_meta: n_merges = 0 | |
llm_load_print_meta: vocab_only = 0 | |
llm_load_print_meta: n_ctx_train = 16384 | |
llm_load_print_meta: n_embd = 4096 | |
llm_load_print_meta: n_layer = 32 | |
llm_load_print_meta: n_head = 32 | |
llm_load_print_meta: n_head_kv = 32 | |
llm_load_print_meta: n_rot = 128 | |
llm_load_print_meta: n_swa = 0 | |
llm_load_print_meta: n_embd_head_k = 128 | |
llm_load_print_meta: n_embd_head_v = 128 | |
llm_load_print_meta: n_gqa = 1 | |
llm_load_print_meta: n_embd_k_gqa = 4096 | |
llm_load_print_meta: n_embd_v_gqa = 4096 | |
llm_load_print_meta: f_norm_eps = 0.0e+00 | |
llm_load_print_meta: f_norm_rms_eps = 1.0e-05 | |
llm_load_print_meta: f_clamp_kqv = 0.0e+00 | |
llm_load_print_meta: f_max_alibi_bias = 0.0e+00 | |
llm_load_print_meta: f_logit_scale = 0.0e+00 | |
llm_load_print_meta: n_ff = 11008 | |
llm_load_print_meta: n_expert = 0 | |
llm_load_print_meta: n_expert_used = 0 | |
llm_load_print_meta: causal attn = 1 | |
llm_load_print_meta: pooling type = 0 | |
llm_load_print_meta: rope type = 0 | |
llm_load_print_meta: rope scaling = linear | |
llm_load_print_meta: freq_base_train = 1000000.0 | |
llm_load_print_meta: freq_scale_train = 1 | |
llm_load_print_meta: n_ctx_orig_yarn = 16384 | |
llm_load_print_meta: rope_finetuned = unknown | |
llm_load_print_meta: ssm_d_conv = 0 | |
llm_load_print_meta: ssm_d_inner = 0 | |
llm_load_print_meta: ssm_d_state = 0 | |
llm_load_print_meta: ssm_dt_rank = 0 | |
llm_load_print_meta: ssm_dt_b_c_rms = 0 | |
llm_load_print_meta: model type = 7B | |
llm_load_print_meta: model ftype = Q4_0 | |
llm_load_print_meta: model params = 6.74 B | |
llm_load_print_meta: model size = 3.56 GiB (4.54 BPW) | |
llm_load_print_meta: general.name = codellama | |
llm_load_print_meta: BOS token = 1 '<s>' | |
llm_load_print_meta: EOS token = 2 '</s>' | |
llm_load_print_meta: UNK token = 0 '<unk>' | |
llm_load_print_meta: LF token = 13 '<0x0A>' | |
llm_load_print_meta: PRE token = 32007 '▁<PRE>' | |
llm_load_print_meta: SUF token = 32008 '▁<SUF>' | |
llm_load_print_meta: MID token = 32009 '▁<MID>' | |
llm_load_print_meta: EOT token = 32010 '▁<EOT>' | |
llm_load_print_meta: EOG token = 2 '</s>' | |
llm_load_print_meta: EOG token = 32010 '▁<EOT>' | |
llm_load_print_meta: max token length = 48 | |
time=2025-01-14T15:05:42.819+01:00 level=INFO source=server.go:589 msg="waiting for server to become available" status="llm server loading model" | |
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no | |
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no | |
ggml_cuda_init: found 1 ROCm devices: | |
Device 0: AMD Radeon RX 6950 XT, compute capability 10.3, VMM: no | |
CUDA error: invalid argument | |
current device: 0, in function ggml_backend_cuda_get_device_memory at llama/ggml-cuda.cu:3199 | |
hipMemGetInfo(free, total) | |
llama/ggml-cuda.cu:132: CUDA error | |
ptrace: Operation not permitted. | |
No stack. | |
The program is not being run. | |
SIGABRT: abort | |
PC=0x7f119db243f4 m=3 sigcode=18446744073709551610 | |
signal arrived during cgo execution | |
goroutine 7 gp=0xc0000fa000 m=3 mp=0xc000074e08 [syscall]: | |
runtime.cgocall(0x55d665333390, 0xc000081ba0) | |
runtime/cgocall.go:167 +0x4b fp=0xc000081b78 sp=0xc000081b40 pc=0x55d6650e770b | |
github.com/ollama/ollama/llama._Cfunc_llama_load_model_from_file(0x7f107c000d50, {0x21, 0x1, 0x0, 0x0, 0x0, 0x55d665332bd0, 0xc000192000, 0x0, 0x0, ...}) | |
_cgo_gotypes.go:699 +0x50 fp=0xc000081ba0 sp=0xc000081b78 pc=0x55d665192010 | |
github.com/ollama/ollama/llama.LoadModelFromFile.func1({0x7ffecdc6e891?, 0x55d6650c2225?}, {0x21, 0x1, 0x0, 0x0, 0x0, 0x55d665332bd0, 0xc000192000, 0x0, ...}) | |
github.com/ollama/ollama/llama/llama.go:291 +0xfa fp=0xc000081c88 sp=0xc000081ba0 pc=0x55d6651949ba | |
github.com/ollama/ollama/llama.LoadModelFromFile({0x7ffecdc6e891, 0x66}, {0x21, 0x0, 0x1, 0x0, {0x0, 0x0, 0x0}, 0xc0000322d0, ...}) | |
github.com/ollama/ollama/llama/llama.go:291 +0x2b8 fp=0xc000081dc8 sp=0xc000081c88 pc=0x55d6651946f8 | |
github.com/ollama/ollama/llama/runner.(*Server).loadModel(0xc0000c61b0, {0x21, 0x0, 0x1, 0x0, {0x0, 0x0, 0x0}, 0xc0000322d0, 0x0}, ...) | |
github.com/ollama/ollama/llama/runner/runner.go:861 +0xc5 fp=0xc000081f10 sp=0xc000081dc8 pc=0x55d6653306a5 | |
github.com/ollama/ollama/llama/runner.RunnerMain.gowrap1() | |
github.com/ollama/ollama/llama/runner/runner.go:980 +0xda fp=0xc000081fe0 sp=0xc000081f10 pc=0x55d665331ffa | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc000081fe8 sp=0xc000081fe0 pc=0x55d6650f5141 | |
created by github.com/ollama/ollama/llama/runner.RunnerMain in goroutine 1 | |
github.com/ollama/ollama/llama/runner/runner.go:980 +0xd05 | |
goroutine 1 gp=0xc0000061c0 m=nil [IO wait]: | |
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?) | |
runtime/proc.go:424 +0xce fp=0xc0000377b0 sp=0xc000037790 pc=0x55d6650ed50e | |
runtime.netpollblock(0xc0001a7f80?, 0x65085d66?, 0xd6?) | |
runtime/netpoll.go:575 +0xf7 fp=0xc0000377e8 sp=0xc0000377b0 pc=0x55d6650b2277 | |
internal/poll.runtime_pollWait(0x7f108a8a2fd0, 0x72) | |
runtime/netpoll.go:351 +0x85 fp=0xc000037808 sp=0xc0000377e8 pc=0x55d6650ec805 | |
internal/poll.(*pollDesc).wait(0xc00003e180?, 0x2c?, 0x0) | |
internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc000037830 sp=0xc000037808 pc=0x55d665142647 | |
internal/poll.(*pollDesc).waitRead(...) | |
internal/poll/fd_poll_runtime.go:89 | |
internal/poll.(*FD).Accept(0xc00003e180) | |
internal/poll/fd_unix.go:620 +0x295 fp=0xc0000378d8 sp=0xc000037830 pc=0x55d665143bb5 | |
net.(*netFD).accept(0xc00003e180) | |
net/fd_unix.go:172 +0x29 fp=0xc000037990 sp=0xc0000378d8 pc=0x55d6651bc249 | |
net.(*TCPListener).accept(0xc00009e700) | |
net/tcpsock_posix.go:159 +0x1e fp=0xc0000379e0 sp=0xc000037990 pc=0x55d6651cc89e | |
net.(*TCPListener).Accept(0xc00009e700) | |
net/tcpsock.go:372 +0x30 fp=0xc000037a10 sp=0xc0000379e0 pc=0x55d6651cbbd0 | |
net/http.(*onceCloseListener).Accept(0xc0001a2000?) | |
<autogenerated>:1 +0x24 fp=0xc000037a28 sp=0xc000037a10 pc=0x55d66530a7a4 | |
net/http.(*Server).Serve(0xc0000f44b0, {0x55d6656a6038, 0xc00009e700}) | |
net/http/server.go:3330 +0x30c fp=0xc000037b58 sp=0xc000037a28 pc=0x55d6652fc4ec | |
github.com/ollama/ollama/llama/runner.RunnerMain({0xc000016130?, 0x55d6650f4d9c?, 0x0?}) | |
github.com/ollama/ollama/llama/runner/runner.go:1006 +0x1198 fp=0xc000037ef8 sp=0xc000037b58 pc=0x55d665331bb8 | |
main.main() | |
github.com/ollama/ollama/cmd/runner/cmd.go:11 +0x54 fp=0xc000037f50 sp=0xc000037ef8 pc=0x55d665332b54 | |
runtime.main() | |
runtime/proc.go:272 +0x29d fp=0xc000037fe0 sp=0xc000037f50 pc=0x55d6650b985d | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc000037fe8 sp=0xc000037fe0 pc=0x55d6650f5141 | |
goroutine 2 gp=0xc000006c40 m=nil [force gc (idle)]: | |
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?) | |
runtime/proc.go:424 +0xce fp=0xc00006efa8 sp=0xc00006ef88 pc=0x55d6650ed50e | |
runtime.goparkunlock(...) | |
runtime/proc.go:430 | |
runtime.forcegchelper() | |
runtime/proc.go:337 +0xb8 fp=0xc00006efe0 sp=0xc00006efa8 pc=0x55d6650b9b98 | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc00006efe8 sp=0xc00006efe0 pc=0x55d6650f5141 | |
created by runtime.init.7 in goroutine 1 | |
runtime/proc.go:325 +0x1a | |
goroutine 3 gp=0xc000007180 m=nil [GC sweep wait]: | |
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?) | |
runtime/proc.go:424 +0xce fp=0xc00006f780 sp=0xc00006f760 pc=0x55d6650ed50e | |
runtime.goparkunlock(...) | |
runtime/proc.go:430 | |
runtime.bgsweep(0xc00009c000) | |
runtime/mgcsweep.go:277 +0x94 fp=0xc00006f7c8 sp=0xc00006f780 pc=0x55d6650a43d4 | |
runtime.gcenable.gowrap1() | |
runtime/mgc.go:204 +0x25 fp=0xc00006f7e0 sp=0xc00006f7c8 pc=0x55d665098c85 | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc00006f7e8 sp=0xc00006f7e0 pc=0x55d6650f5141 | |
created by runtime.gcenable in goroutine 1 | |
runtime/mgc.go:204 +0x66 | |
goroutine 4 gp=0xc000007340 m=nil [GC scavenge wait]: | |
runtime.gopark(0xc00009c000?, 0x55d66559ef40?, 0x1?, 0x0?, 0xc000007340?) | |
runtime/proc.go:424 +0xce fp=0xc00006ff78 sp=0xc00006ff58 pc=0x55d6650ed50e | |
runtime.goparkunlock(...) | |
runtime/proc.go:430 | |
runtime.(*scavengerState).park(0x55d6658901c0) | |
runtime/mgcscavenge.go:425 +0x49 fp=0xc00006ffa8 sp=0xc00006ff78 pc=0x55d6650a1e09 | |
runtime.bgscavenge(0xc00009c000) | |
runtime/mgcscavenge.go:653 +0x3c fp=0xc00006ffc8 sp=0xc00006ffa8 pc=0x55d6650a237c | |
runtime.gcenable.gowrap2() | |
runtime/mgc.go:205 +0x25 fp=0xc00006ffe0 sp=0xc00006ffc8 pc=0x55d665098c25 | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc00006ffe8 sp=0xc00006ffe0 pc=0x55d6650f5141 | |
created by runtime.gcenable in goroutine 1 | |
runtime/mgc.go:205 +0xa5 | |
goroutine 5 gp=0xc000007c00 m=nil [finalizer wait]: | |
runtime.gopark(0xc00006e648?, 0x55d66508f185?, 0xb0?, 0x1?, 0xc0000061c0?) | |
runtime/proc.go:424 +0xce fp=0xc00006e620 sp=0xc00006e600 pc=0x55d6650ed50e | |
runtime.runfinq() | |
runtime/mfinal.go:193 +0x107 fp=0xc00006e7e0 sp=0xc00006e620 pc=0x55d665097d07 | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc00006e7e8 sp=0xc00006e7e0 pc=0x55d6650f5141 | |
created by runtime.createfing in goroutine 1 | |
runtime/mfinal.go:163 +0x3d | |
goroutine 6 gp=0xc000007dc0 m=nil [chan receive]: | |
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?) | |
runtime/proc.go:424 +0xce fp=0xc000070718 sp=0xc0000706f8 pc=0x55d6650ed50e | |
runtime.chanrecv(0xc00002a150, 0x0, 0x1) | |
runtime/chan.go:639 +0x41c fp=0xc000070790 sp=0xc000070718 pc=0x55d66508895c | |
runtime.chanrecv1(0x0?, 0x0?) | |
runtime/chan.go:489 +0x12 fp=0xc0000707b8 sp=0xc000070790 pc=0x55d665088532 | |
runtime.unique_runtime_registerUniqueMapCleanup.func1(...) | |
runtime/mgc.go:1781 | |
runtime.unique_runtime_registerUniqueMapCleanup.gowrap1() | |
runtime/mgc.go:1784 +0x2f fp=0xc0000707e0 sp=0xc0000707b8 pc=0x55d66509baef | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc0000707e8 sp=0xc0000707e0 pc=0x55d6650f5141 | |
created by unique.runtime_registerUniqueMapCleanup in goroutine 1 | |
runtime/mgc.go:1779 +0x96 | |
goroutine 8 gp=0xc0000fa1c0 m=nil [semacquire]: | |
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?) | |
runtime/proc.go:424 +0xce fp=0xc000080e08 sp=0xc000080de8 pc=0x55d6650ed50e | |
runtime.goparkunlock(...) | |
runtime/proc.go:430 | |
runtime.semacquire1(0xc0000c61b8, 0x0, 0x1, 0x0, 0x12) | |
runtime/sema.go:178 +0x22c fp=0xc000080e70 sp=0xc000080e08 pc=0x55d6650cc82c | |
sync.runtime_Semacquire(0x0?) | |
runtime/sema.go:71 +0x25 fp=0xc000080ea8 sp=0xc000080e70 pc=0x55d6650ee745 | |
sync.(*WaitGroup).Wait(0x0?) | |
sync/waitgroup.go:118 +0x48 fp=0xc000080ed0 sp=0xc000080ea8 pc=0x55d66510a9e8 | |
github.com/ollama/ollama/llama/runner.(*Server).run(0xc0000c61b0, {0x55d6656a6620, 0xc0000f8050}) | |
github.com/ollama/ollama/llama/runner/runner.go:315 +0x47 fp=0xc000080fb8 sp=0xc000080ed0 pc=0x55d66532cd87 | |
github.com/ollama/ollama/llama/runner.RunnerMain.gowrap2() | |
github.com/ollama/ollama/llama/runner/runner.go:985 +0x28 fp=0xc000080fe0 sp=0xc000080fb8 pc=0x55d665331ee8 | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc000080fe8 sp=0xc000080fe0 pc=0x55d6650f5141 | |
created by github.com/ollama/ollama/llama/runner.RunnerMain in goroutine 1 | |
github.com/ollama/ollama/llama/runner/runner.go:985 +0xddf | |
goroutine 18 gp=0xc0001a8000 m=nil [IO wait]: | |
runtime.gopark(0x55d6651438e5?, 0xc0001a0000?, 0x10?, 0x9a?, 0xb?) | |
runtime/proc.go:424 +0xce fp=0xc0000e9918 sp=0xc0000e98f8 pc=0x55d6650ed50e | |
runtime.netpollblock(0x55d665128d38?, 0x65085d66?, 0xd6?) | |
runtime/netpoll.go:575 +0xf7 fp=0xc0000e9950 sp=0xc0000e9918 pc=0x55d6650b2277 | |
internal/poll.runtime_pollWait(0x7f108a8a2eb8, 0x72) | |
runtime/netpoll.go:351 +0x85 fp=0xc0000e9970 sp=0xc0000e9950 pc=0x55d6650ec805 | |
internal/poll.(*pollDesc).wait(0xc0001a0000?, 0xc0001ae000?, 0x0) | |
internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc0000e9998 sp=0xc0000e9970 pc=0x55d665142647 | |
internal/poll.(*pollDesc).waitRead(...) | |
internal/poll/fd_poll_runtime.go:89 | |
internal/poll.(*FD).Read(0xc0001a0000, {0xc0001ae000, 0x1000, 0x1000}) | |
internal/poll/fd_unix.go:165 +0x27a fp=0xc0000e9a30 sp=0xc0000e9998 pc=0x55d66514319a | |
net.(*netFD).Read(0xc0001a0000, {0xc0001ae000?, 0xc0000e9aa0?, 0x55d665142b05?}) | |
net/fd_posix.go:55 +0x25 fp=0xc0000e9a78 sp=0xc0000e9a30 pc=0x55d6651bb165 | |
net.(*conn).Read(0xc000198008, {0xc0001ae000?, 0x0?, 0xc0001960f8?}) | |
net/net.go:189 +0x45 fp=0xc0000e9ac0 sp=0xc0000e9a78 pc=0x55d6651c4b65 | |
net.(*TCPConn).Read(0xc0001960f0?, {0xc0001ae000?, 0xc0001a0000?, 0xc0000e9af8?}) | |
<autogenerated>:1 +0x25 fp=0xc0000e9af0 sp=0xc0000e9ac0 pc=0x55d6651d1c05 | |
net/http.(*connReader).Read(0xc0001960f0, {0xc0001ae000, 0x1000, 0x1000}) | |
net/http/server.go:798 +0x14b fp=0xc0000e9b40 sp=0xc0000e9af0 pc=0x55d6652f2deb | |
bufio.(*Reader).fill(0xc000190060) | |
bufio/bufio.go:110 +0x103 fp=0xc0000e9b78 sp=0xc0000e9b40 pc=0x55d6652b1a03 | |
bufio.(*Reader).Peek(0xc000190060, 0x4) | |
bufio/bufio.go:148 +0x53 fp=0xc0000e9b98 sp=0xc0000e9b78 pc=0x55d6652b1b33 | |
net/http.(*conn).serve(0xc0001a2000, {0x55d6656a65e8, 0xc0000b50e0}) | |
net/http/server.go:2127 +0x738 fp=0xc0000e9fb8 sp=0xc0000e9b98 pc=0x55d6652f8138 | |
net/http.(*Server).Serve.gowrap3() | |
net/http/server.go:3360 +0x28 fp=0xc0000e9fe0 sp=0xc0000e9fb8 pc=0x55d6652fc8e8 | |
runtime.goexit({}) | |
runtime/asm_amd64.s:1700 +0x1 fp=0xc0000e9fe8 sp=0xc0000e9fe0 pc=0x55d6650f5141 | |
created by net/http.(*Server).Serve in goroutine 1 | |
net/http/server.go:3360 +0x485 | |
rax 0x0 | |
rbx 0xd6b1 | |
rcx 0x7f119db243f4 | |
rdx 0x6 | |
rdi 0xd6af | |
rsi 0xd6b1 | |
rbp 0x7f108a88b800 | |
rsp 0x7f108a88b7c0 | |
r8 0x0 | |
r9 0x0 | |
r10 0x8 | |
r11 0x246 | |
r12 0x7f108a8966c0 | |
r13 0x7f12135a65a4 | |
r14 0x6 | |
r15 0x7f12135a65a4 | |
rip 0x7f119db243f4 | |
rflags 0x246 | |
cs 0x33 | |
fs 0x0 | |
gs 0x0 | |
time=2025-01-14T15:05:43.530+01:00 level=DEBUG source=server.go:416 msg="llama runner terminated" error="exit status 2" | |
time=2025-01-14T15:05:43.570+01:00 level=ERROR source=sched.go:455 msg="error loading llama server" error="llama runner process has terminated: CUDA error: invalid argument\n current device: 0, in function ggml_backend_cuda_get_device_memory at llama/ggml-cuda.cu:3199\n hipMemGetInfo(free, total)\nllama/ggml-cuda.cu:132: CUDA error" | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=sched.go:458 msg="triggering expiration for failed load" model=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=sched.go:360 msg="runner expired event received" modelPath=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=sched.go:375 msg="got lock to unload" modelPath=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
[GIN] 2025/01/14 - 15:05:43 | 500 | 1.017518954s | 127.0.0.1 | POST "/api/generate" | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=gpu.go:406 msg="updating system memory data" before.total="23.4 GiB" before.free="18.2 GiB" before.free_swap="18.0 GiB" now.total="23.4 GiB" now.free="18.2 GiB" now.free_swap="18.0 GiB" | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=amd_linux.go:490 msg="updating rocm free memory" gpu=0 name=1002:73a5 before="15.4 GiB" now="15.4 GiB" | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=server.go:1078 msg="stopping llama server" | |
time=2025-01-14T15:05:43.571+01:00 level=DEBUG source=sched.go:380 msg="runner released" modelPath=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
time=2025-01-14T15:05:43.822+01:00 level=DEBUG source=gpu.go:406 msg="updating system memory data" before.total="23.4 GiB" before.free="18.2 GiB" before.free_swap="18.0 GiB" now.total="23.4 GiB" now.free="18.2 GiB" now.free_swap="18.0 GiB" | |
time=2025-01-14T15:05:43.822+01:00 level=DEBUG source=amd_linux.go:490 msg="updating rocm free memory" gpu=0 name=1002:73a5 before="15.4 GiB" now="15.4 GiB" | |
time=2025-01-14T15:05:43.822+01:00 level=DEBUG source=sched.go:659 msg="gpu VRAM free memory converged after 0.25 seconds" model=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
time=2025-01-14T15:05:43.822+01:00 level=DEBUG source=sched.go:384 msg="sending an unloaded event" modelPath=/home/user/.ollama/models/blobs/sha256-3a43f93b78ec50f7c4e4dc8bd1cb3fff5a900e7d574c51a6f7495e48486e0dac | |
time=2025-01-14T15:05:43.822+01:00 level=DEBUG source=sched.go:308 msg="ignoring unload event with no pending requests" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment