amcintyre99 · January 25, 2025 20:58
diff --git a/gistfile1.txt b/gistfile1.txt
 andrew@boat:~$ cat /etc/os-release
 PRETTY_NAME="Ubuntu 24.04.1 LTS"
 NAME="Ubuntu"
 VERSION_ID="24.04"
 VERSION="24.04.1 LTS (Noble Numbat)"
 VERSION_CODENAME=noble
 ID=ubuntu
 ID_LIKE=debian
 HOME_URL="https://www.ubuntu.com/"
 SUPPORT_URL="https://help.ubuntu.com/"
 BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
 PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
 UBUNTU_CODENAME=noble
 LOGO=ubuntu-logo
 andrew@boat:~$ 

 andrew@boat:~$ find /usr/local/cuda* /usr/lib* -name "libnvidia*.so*"
 find: ‘/usr/local/cuda*’: No such file or directory
 /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-gbm.so.1.1.2
 /usr/lib/x86_64-linux-gnu/libnvidia-encode.so
 /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so
 /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-vksc-core.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-xlib.so.1.0.0
 /usr/lib/x86_64-linux-gnu/libnvidia-gpucomp.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-xcb.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-glsi.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so
 /usr/lib/x86_64-linux-gnu/libnvidia-api.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so
 /usr/lib/x86_64-linux-gnu/libnvidia-vksc-core.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-pkcs11-openssl3.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-gbm.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so
 /usr/lib/x86_64-linux-gnu/libnvidia-fbc.so
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-wayland.so.1.1.15
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-xlib.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-pkcs11.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-xcb.so.1.0.0
 /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so
 /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-ml.so
 /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.565.77
 /usr/lib/x86_64-linux-gnu/libnvidia-egl-wayland.so.1
 /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.4
 /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.565.77
 /usr/lib/libnvidia-gtk2.so.510.47.03
 /usr/lib/libnvidia-gtk3.so.510.47.03
 andrew@boat:~$ 

 andrew@boat:~$ find /usr/local/cuda* /usr/lib* -name "libcuda*.so*"
 find: ‘/usr/local/cuda*’: No such file or directory
 /usr/lib/x86_64-linux-gnu/libcudadebugger.so.1
 /usr/lib/x86_64-linux-gnu/libcuda.so
 /usr/lib/x86_64-linux-gnu/libcuda.so.1
 /usr/lib/x86_64-linux-gnu/libcudadebugger.so.565.77
 /usr/lib/x86_64-linux-gnu/libcuda.so.565.77
 /usr/lib/ollama/libcudart.so.11.0
 /usr/lib/ollama/libcudart.so.11.3.109
 /usr/lib/ollama/libcudart.so.12.4.127
 /usr/lib/ollama/libcudart.so.12
 andrew@boat:~$ 

 andrew@boat:~$ find /usr/local/cuda* /usr/lib* -name "libcublas*.so*"
 find: ‘/usr/local/cuda*’: No such file or directory
 /usr/lib/ollama/libcublas.so.12.4.5.8
 /usr/lib/ollama/libcublasLt.so.11.5.1.109
 /usr/lib/ollama/libcublasLt.so.12
 /usr/lib/ollama/libcublas.so.11
 /usr/lib/ollama/libcublasLt.so.12.4.5.8
 /usr/lib/ollama/libcublas.so.12
 /usr/lib/ollama/libcublas.so.11.5.1.109
 /usr/lib/ollama/libcublasLt.so.11
 andrew@boat:~$ 

 andrew@boat:~/dma/nllama$ ldd ./node_modules/@node-llama-cpp/linux-x64-cuda/bins/linux-x64-cuda/libggml-cuda.so
 	linux-vdso.so.1 (0x00007ffcfff94000)
 	libggml-base.so => not found
 	libcudart.so.12 => not found
 	libcublas.so.12 => not found
 	libcuda.so.1 => /lib/x86_64-linux-gnu/libcuda.so.1 (0x000075af14400000)
 	libstdc++.so.6 => /lib/x86_64-linux-gnu/libstdc++.so.6 (0x000075af14000000)
 	libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x000075af14317000)
 	libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x000075af173be000)
 	libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x000075af13c00000)
 	libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x000075af173b9000)
 	libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x000075af173b4000)
 	librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 (0x000075af14312000)
 	/lib64/ld-linux-x86-64.so.2 (0x000075af2caa0000)
 andrew@boat:~/dma/nllama$ 



 andrew@boat:~/dma/nllama$ npx -y node-llama-cpp chat --prompt 'Hi there!' --gpu cuda "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_M.gguf"
 The prebuilt binary for platform "linux" "x64" with CUDA support is not compatible with the current system, falling back to building from source
 Cloning llama.cpp
 ✔ Cloned ggerganov/llama.cpp (local bundle)
 ◷ Downloading cmake
 @xpack-dev-tools/[email protected]...
 @xpack-dev-tools/[email protected] => '/home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/store/@xpack-dev-tools/cmake/3.29.9-1.1'
 Downloading https://github.com/xpack-dev-tools/cmake-xpack/releases/download/v3.29.9-1/xpack-cmake-3.29.9-1-linux-x64.tar.gz...
 Extracting 'xpack-cmake-3.29.9-1-linux-x64.tar.gz'...
 3490 files => '/home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/store/@xpack-dev-tools/cmake/3.29.9-1.1/.content'
 'xpacks/@xpack-dev-tools/cmake' -> '/home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/store/@xpack-dev-tools/cmake/3.29.9-1.1'
 'xpacks/.bin/ccmake' -> '../@xpack-dev-tools/cmake/.content/bin/ccmake'
 'xpacks/.bin/cmake' -> '../@xpack-dev-tools/cmake/.content/bin/cmake'
 'xpacks/.bin/cpack' -> '../@xpack-dev-tools/cmake/.content/bin/cpack'
 'xpacks/.bin/ctest' -> '../@xpack-dev-tools/cmake/.content/bin/ctest'
 ✔ Downloaded cmake
 Not searching for unused variables given on the command line.
 -- The C compiler identification is GNU 13.3.0
 -- The CXX compiler identification is GNU 13.3.0
 -- Detecting C compiler ABI info
 -- Detecting C compiler ABI info - done
 -- Check for working C compiler: /usr/bin/cc - skipped
 -- Detecting C compile features
 -- Detecting C compile features - done
 -- Detecting CXX compiler ABI info
 -- Detecting CXX compiler ABI info - done
 -- Check for working CXX compiler: /usr/bin/c++ - skipped
 -- Detecting CXX compile features
 -- Detecting CXX compile features - done
 -- Found Git: /usr/bin/git (found version "2.43.0")
 -- Performing Test CMAKE_HAVE_LIBC_PTHREAD
 -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
 -- Found Threads: TRUE
 -- CMAKE_SYSTEM_PROCESSOR: x86_64
 -- Including CPU backend
 -- Found OpenMP_C: -fopenmp (found version "4.5")
 -- Found OpenMP_CXX: -fopenmp (found version "4.5")
 -- Found OpenMP: TRUE (found version "4.5")
 -- x86 detected
 -- Adding CPU backend variant ggml-cpu: -march=native 
 -- Could not find nvcc, please set CUDAToolkit_ROOT.
 CMake Error at llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt:151 (message):
  CUDA Toolkit not found


 -- Configuring incomplete, errors occurred!
 Not searching for unused variables given on the command line.
 -- The C compiler identification is GNU 13.3.0
 -- The CXX compiler identification is GNU 13.3.0
 -- Detecting C compiler ABI info
 -- Detecting C compiler ABI info - done
 -- Check for working C compiler: /usr/bin/cc - skipped
 -- Detecting C compile features
 -- Detecting C compile features - done
 -- Detecting CXX compiler ABI info
 -- Detecting CXX compiler ABI info - done
 -- Check for working CXX compiler: /usr/bin/c++ - skipped
 -- Detecting CXX compile features
 -- Detecting CXX compile features - done
 -- Found Git: /usr/bin/git (found version "2.43.0")
 -- Performing Test CMAKE_HAVE_LIBC_PTHREAD
 -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
 -- Found Threads: TRUE
 -- CMAKE_SYSTEM_PROCESSOR: x86_64
 -- Including CPU backend
 -- Found OpenMP_C: -fopenmp (found version "4.5")
 -- Found OpenMP_CXX: -fopenmp (found version "4.5")
 -- Found OpenMP: TRUE (found version "4.5")
 -- x86 detected
 -- Adding CPU backend variant ggml-cpu: -march=native 
 -- Could not find nvcc, please set CUDAToolkit_ROOT.
 CMake Error at llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt:151 (message):
  CUDA Toolkit not found


 -- Configuring incomplete, errors occurred!
 ERR! OMG Process terminated: 1

 [node-llama-cpp] To resolve errors related to CUDA compilation, see the CUDA guide: https://node-llama-cpp.withcat.ai/guide/CUDA
 Failed to build llama.cpp with CUDA support. Error: SpawnError: Command npm run -s cmake-js-llama -- compile --log-level warn --config Release --arch=x64 --out localBuilds/linux-x64-cuda --runtime-version=18.20.5 --parallel=14 --cmake-path /home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/xpacks/.bin/cmake --CDCMAKE_CONFIGURATION_TYPES=Release --CDNLC_CURRENT_PLATFORM=linux-x64 --CDNLC_TARGET_PLATFORM=linux-x64 --CDGGML_METAL=OFF --CDGGML_CUDA=1 --CDGGML_CCACHE=OFF exited with code 1
    at createError (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:34:20)
    at ChildProcess.<anonymous> (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:47:24)
    at ChildProcess.emit (node:events:517:28)
    at ChildProcess._handle.onexit (node:internal/child_process:292:12)
 SpawnError: Command npm run -s cmake-js-llama -- compile --log-level warn --config Release --arch=x64 --out localBuilds/linux-x64-cuda --runtime-version=18.20.5 --parallel=14 --cmake-path /home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/xpacks/.bin/cmake --CDCMAKE_CONFIGURATION_TYPES=Release --CDNLC_CURRENT_PLATFORM=linux-x64 --CDNLC_TARGET_PLATFORM=linux-x64 --CDGGML_METAL=OFF --CDGGML_CUDA=1 --CDGGML_CCACHE=OFF exited with code 1
    at createError (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:34:20)
    at ChildProcess.<anonymous> (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:47:24)
    at ChildProcess.emit (node:events:517:28)
    at ChildProcess._handle.onexit (node:internal/child_process:292:12)
 andrew@boat:~/dma/nllama$ 


 andrew@boat:~/dma/nllama$ npx -y node-llama-cpp chat --prompt 'Hi there!' --gpu vulkan "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_M.gguf"
 Downloading to ~/.node-llama-cpp/models
 ✔ hf_mradermacher_Llama-3.2-3B-Instruct.Q4_K_M.gguf downloaded 2.02GB in 28s
 File: ~/.node-llama-cpp/models/hf_mradermacher_Llama-3.2-3B-Instruct.Q4_K_M.gguf
 Loading model                  0.000%                 
 ✔ Model loaded
 ⠋ Creating contextggml_vulkan: Device memory allocation of size 2576351232 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 llama_kv_cache_init: failed to allocate buffer for kv cache
 llama_init_from_model: llama_kv_cache_init() failed for self-attention cache
 ggml_vulkan: Device memory allocation of size 2161639424 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 llama_kv_cache_init: failed to allocate buffer for kv cache
 llama_init_from_model: llama_kv_cache_init() failed for self-attention cache
 ggml_vulkan: Device memory allocation of size 1816657920 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 llama_kv_cache_init: failed to allocate buffer for kv cache
 llama_init_from_model: llama_kv_cache_init() failed for self-attention cache
 ⠙ Creating contextggml_vulkan: Device memory allocation of size 706744320 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 706744320
 ggml_vulkan: Device memory allocation of size 706744320 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 706744320
 llama_init_from_model: failed to allocate compute buffers
 ⠼ Creating contextggml_vulkan: Device memory allocation of size 596971520 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 596971520
 ggml_vulkan: Device memory allocation of size 596971520 failed.
 ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
 ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 596971520
 llama_init_from_model: failed to allocate compute buffers
 ✔ Context created
 GPU       Type: Vulkan   VRAM: 4GB   Name: Quadro T1000 
 Model     Type: llama 3B Q4_K - Medium   Size: 1.87GB   GPU layers: 29/29 offloaded (100%)   mmap: enabled   BOS: <|begin_of_text|>   EOS: <|eot_id|>   Train context size: 131,072 
 Context   Size: 9,381   Threads: 8 
 Chat      Wrapper: Llama 3.2 lightweight   Repeat penalty: 1.1 (apply to last 64 tokens) 
 > Hi there!
 AI: Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat for a bit?
 >
	andrew@boat:~$ cat /etc/os-release
	PRETTY_NAME="Ubuntu 24.04.1 LTS"
	NAME="Ubuntu"
	VERSION_ID="24.04"
	VERSION="24.04.1 LTS (Noble Numbat)"
	VERSION_CODENAME=noble
	ID=ubuntu
	ID_LIKE=debian
	HOME_URL="https://www.ubuntu.com/"
	SUPPORT_URL="https://help.ubuntu.com/"
	BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
	PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
	UBUNTU_CODENAME=noble
	LOGO=ubuntu-logo
	andrew@boat:~$

	andrew@boat:~$ find /usr/local/cuda* /usr/lib* -name "libnvidia.so"
	find: ‘/usr/local/cuda*’: No such file or directory
	/usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-gbm.so.1.1.2
	/usr/lib/x86_64-linux-gnu/libnvidia-encode.so
	/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-cfg.so
	/usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-vksc-core.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-xlib.so.1.0.0
	/usr/lib/x86_64-linux-gnu/libnvidia-gpucomp.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-xcb.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-glsi.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-allocator.so
	/usr/lib/x86_64-linux-gnu/libnvidia-api.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so
	/usr/lib/x86_64-linux-gnu/libnvidia-vksc-core.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-pkcs11-openssl3.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-gbm.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-encode.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so
	/usr/lib/x86_64-linux-gnu/libnvidia-fbc.so
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-wayland.so.1.1.15
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-xlib.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-pkcs11.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-encode.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-xcb.so.1.0.0
	/usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so
	/usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-ml.so
	/usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.565.77
	/usr/lib/x86_64-linux-gnu/libnvidia-egl-wayland.so.1
	/usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.4
	/usr/lib/x86_64-linux-gnu/libnvidia-tls.so.565.77
	/usr/lib/libnvidia-gtk2.so.510.47.03
	/usr/lib/libnvidia-gtk3.so.510.47.03
	andrew@boat:~$

	andrew@boat:~$ find /usr/local/cuda* /usr/lib* -name "libcuda.so"
	find: ‘/usr/local/cuda*’: No such file or directory
	/usr/lib/x86_64-linux-gnu/libcudadebugger.so.1
	/usr/lib/x86_64-linux-gnu/libcuda.so
	/usr/lib/x86_64-linux-gnu/libcuda.so.1
	/usr/lib/x86_64-linux-gnu/libcudadebugger.so.565.77
	/usr/lib/x86_64-linux-gnu/libcuda.so.565.77
	/usr/lib/ollama/libcudart.so.11.0
	/usr/lib/ollama/libcudart.so.11.3.109
	/usr/lib/ollama/libcudart.so.12.4.127
	/usr/lib/ollama/libcudart.so.12
	andrew@boat:~$

	andrew@boat:~$ find /usr/local/cuda* /usr/lib* -name "libcublas.so"
	find: ‘/usr/local/cuda*’: No such file or directory
	/usr/lib/ollama/libcublas.so.12.4.5.8
	/usr/lib/ollama/libcublasLt.so.11.5.1.109
	/usr/lib/ollama/libcublasLt.so.12
	/usr/lib/ollama/libcublas.so.11
	/usr/lib/ollama/libcublasLt.so.12.4.5.8
	/usr/lib/ollama/libcublas.so.12
	/usr/lib/ollama/libcublas.so.11.5.1.109
	/usr/lib/ollama/libcublasLt.so.11
	andrew@boat:~$

	andrew@boat:~/dma/nllama$ ldd ./node_modules/@node-llama-cpp/linux-x64-cuda/bins/linux-x64-cuda/libggml-cuda.so
	linux-vdso.so.1 (0x00007ffcfff94000)
	libggml-base.so => not found
	libcudart.so.12 => not found
	libcublas.so.12 => not found
	libcuda.so.1 => /lib/x86_64-linux-gnu/libcuda.so.1 (0x000075af14400000)
	libstdc++.so.6 => /lib/x86_64-linux-gnu/libstdc++.so.6 (0x000075af14000000)
	libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x000075af14317000)
	libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x000075af173be000)
	libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x000075af13c00000)
	libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x000075af173b9000)
	libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x000075af173b4000)
	librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 (0x000075af14312000)
	/lib64/ld-linux-x86-64.so.2 (0x000075af2caa0000)
	andrew@boat:~/dma/nllama$



	andrew@boat:~/dma/nllama$ npx -y node-llama-cpp chat --prompt 'Hi there!' --gpu cuda "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_M.gguf"
	The prebuilt binary for platform "linux" "x64" with CUDA support is not compatible with the current system, falling back to building from source
	Cloning llama.cpp
	✔ Cloned ggerganov/llama.cpp (local bundle)
	◷ Downloading cmake
	@xpack-dev-tools/[email protected]...
	@xpack-dev-tools/[email protected] => '/home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/store/@xpack-dev-tools/cmake/3.29.9-1.1'
	Downloading https://github.com/xpack-dev-tools/cmake-xpack/releases/download/v3.29.9-1/xpack-cmake-3.29.9-1-linux-x64.tar.gz...
	Extracting 'xpack-cmake-3.29.9-1-linux-x64.tar.gz'...
	3490 files => '/home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/store/@xpack-dev-tools/cmake/3.29.9-1.1/.content'
	'xpacks/@xpack-dev-tools/cmake' -> '/home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/store/@xpack-dev-tools/cmake/3.29.9-1.1'
	'xpacks/.bin/ccmake' -> '../@xpack-dev-tools/cmake/.content/bin/ccmake'
	'xpacks/.bin/cmake' -> '../@xpack-dev-tools/cmake/.content/bin/cmake'
	'xpacks/.bin/cpack' -> '../@xpack-dev-tools/cmake/.content/bin/cpack'
	'xpacks/.bin/ctest' -> '../@xpack-dev-tools/cmake/.content/bin/ctest'
	✔ Downloaded cmake
	Not searching for unused variables given on the command line.
	-- The C compiler identification is GNU 13.3.0
	-- The CXX compiler identification is GNU 13.3.0
	-- Detecting C compiler ABI info
	-- Detecting C compiler ABI info - done
	-- Check for working C compiler: /usr/bin/cc - skipped
	-- Detecting C compile features
	-- Detecting C compile features - done
	-- Detecting CXX compiler ABI info
	-- Detecting CXX compiler ABI info - done
	-- Check for working CXX compiler: /usr/bin/c++ - skipped
	-- Detecting CXX compile features
	-- Detecting CXX compile features - done
	-- Found Git: /usr/bin/git (found version "2.43.0")
	-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
	-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
	-- Found Threads: TRUE
	-- CMAKE_SYSTEM_PROCESSOR: x86_64
	-- Including CPU backend
	-- Found OpenMP_C: -fopenmp (found version "4.5")
	-- Found OpenMP_CXX: -fopenmp (found version "4.5")
	-- Found OpenMP: TRUE (found version "4.5")
	-- x86 detected
	-- Adding CPU backend variant ggml-cpu: -march=native
	-- Could not find nvcc, please set CUDAToolkit_ROOT.
	CMake Error at llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt:151 (message):
	CUDA Toolkit not found


	-- Configuring incomplete, errors occurred!
	Not searching for unused variables given on the command line.
	-- The C compiler identification is GNU 13.3.0
	-- The CXX compiler identification is GNU 13.3.0
	-- Detecting C compiler ABI info
	-- Detecting C compiler ABI info - done
	-- Check for working C compiler: /usr/bin/cc - skipped
	-- Detecting C compile features
	-- Detecting C compile features - done
	-- Detecting CXX compiler ABI info
	-- Detecting CXX compiler ABI info - done
	-- Check for working CXX compiler: /usr/bin/c++ - skipped
	-- Detecting CXX compile features
	-- Detecting CXX compile features - done
	-- Found Git: /usr/bin/git (found version "2.43.0")
	-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
	-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
	-- Found Threads: TRUE
	-- CMAKE_SYSTEM_PROCESSOR: x86_64
	-- Including CPU backend
	-- Found OpenMP_C: -fopenmp (found version "4.5")
	-- Found OpenMP_CXX: -fopenmp (found version "4.5")
	-- Found OpenMP: TRUE (found version "4.5")
	-- x86 detected
	-- Adding CPU backend variant ggml-cpu: -march=native
	-- Could not find nvcc, please set CUDAToolkit_ROOT.
	CMake Error at llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt:151 (message):
	CUDA Toolkit not found


	-- Configuring incomplete, errors occurred!
	ERR! OMG Process terminated: 1

	[node-llama-cpp] To resolve errors related to CUDA compilation, see the CUDA guide: https://node-llama-cpp.withcat.ai/guide/CUDA
	Failed to build llama.cpp with CUDA support. Error: SpawnError: Command npm run -s cmake-js-llama -- compile --log-level warn --config Release --arch=x64 --out localBuilds/linux-x64-cuda --runtime-version=18.20.5 --parallel=14 --cmake-path /home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/xpacks/.bin/cmake --CDCMAKE_CONFIGURATION_TYPES=Release --CDNLC_CURRENT_PLATFORM=linux-x64 --CDNLC_TARGET_PLATFORM=linux-x64 --CDGGML_METAL=OFF --CDGGML_CUDA=1 --CDGGML_CCACHE=OFF exited with code 1
	at createError (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:34:20)
	at ChildProcess.<anonymous> (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:47:24)
	at ChildProcess.emit (node:events:517:28)
	at ChildProcess._handle.onexit (node:internal/child_process:292:12)
	SpawnError: Command npm run -s cmake-js-llama -- compile --log-level warn --config Release --arch=x64 --out localBuilds/linux-x64-cuda --runtime-version=18.20.5 --parallel=14 --cmake-path /home/andrew/dma/nllama/node_modules/node-llama-cpp/llama/xpack/xpacks/.bin/cmake --CDCMAKE_CONFIGURATION_TYPES=Release --CDNLC_CURRENT_PLATFORM=linux-x64 --CDNLC_TARGET_PLATFORM=linux-x64 --CDGGML_METAL=OFF --CDGGML_CUDA=1 --CDGGML_CCACHE=OFF exited with code 1
	at createError (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:34:20)
	at ChildProcess.<anonymous> (file:///home/andrew/dma/nllama/node_modules/node-llama-cpp/dist/utils/spawnCommand.js:47:24)
	at ChildProcess.emit (node:events:517:28)
	at ChildProcess._handle.onexit (node:internal/child_process:292:12)
	andrew@boat:~/dma/nllama$


	andrew@boat:~/dma/nllama$ npx -y node-llama-cpp chat --prompt 'Hi there!' --gpu vulkan "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct.Q4_K_M.gguf"
	Downloading to ~/.node-llama-cpp/models
	✔ hf_mradermacher_Llama-3.2-3B-Instruct.Q4_K_M.gguf downloaded 2.02GB in 28s
	File: ~/.node-llama-cpp/models/hf_mradermacher_Llama-3.2-3B-Instruct.Q4_K_M.gguf
	Loading model 0.000%
	✔ Model loaded
	⠋ Creating contextggml_vulkan: Device memory allocation of size 2576351232 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	llama_kv_cache_init: failed to allocate buffer for kv cache
	llama_init_from_model: llama_kv_cache_init() failed for self-attention cache
	ggml_vulkan: Device memory allocation of size 2161639424 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	llama_kv_cache_init: failed to allocate buffer for kv cache
	llama_init_from_model: llama_kv_cache_init() failed for self-attention cache
	ggml_vulkan: Device memory allocation of size 1816657920 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	llama_kv_cache_init: failed to allocate buffer for kv cache
	llama_init_from_model: llama_kv_cache_init() failed for self-attention cache
	⠙ Creating contextggml_vulkan: Device memory allocation of size 706744320 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 706744320
	ggml_vulkan: Device memory allocation of size 706744320 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 706744320
	llama_init_from_model: failed to allocate compute buffers
	⠼ Creating contextggml_vulkan: Device memory allocation of size 596971520 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 596971520
	ggml_vulkan: Device memory allocation of size 596971520 failed.
	ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfDeviceMemory
	ggml_gallocr_reserve_n: failed to allocate Vulkan0 buffer of size 596971520
	llama_init_from_model: failed to allocate compute buffers
	✔ Context created
	GPU Type: Vulkan VRAM: 4GB Name: Quadro T1000
	Model Type: llama 3B Q4_K - Medium Size: 1.87GB GPU layers: 29/29 offloaded (100%) mmap: enabled BOS: <\|begin_of_text\|> EOS: <\|eot_id\|> Train context size: 131,072
	Context Size: 9,381 Threads: 8
	Chat Wrapper: Llama 3.2 lightweight Repeat penalty: 1.1 (apply to last 64 tokens)
	> Hi there!
	AI: Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat for a bit?
	>