evadne · February 11, 2025 02:58
diff --git a/hypnos.yaml b/hypnos.yaml
 architecture: x86_64
 config:
  cloud-init.user-data: |
    #cloud-config
    package_upgrade: true
    packages:
      - apt-transport-https
      - build-essential
      - ca-certificates
      - cmake
      - git
      - gnupg
      - libopenblas-dev
      - libssl-dev
      - lsb-release
      - numactl
      - libomp-dev
      - pkg-config
      - software-properties-common
      - wget
    write_files:
      - path: /setup/setup-application.sh
        owner: root:root
        permissions: '0700'
        content: |
          #!/usr/bin/env bash
          set -euxo pipefail
          git clone \
            --depth=1 \
            --single-branch \
            --branch=master \
            https://github.com/ggerganov/llama.cpp \
            /run/llama.cpp
          cd /run/llama.cpp
          cmake -S . -B build \
            -DCMAKE_BUILD_TYPE=Release \
            -DLLAMA_CURL=OFF \
            -DLLAMA_BUILD_EXAMPLES=ON \
            -DGGML_NATIVE=OFF \
            -DGGML_BACKEND_DL=ON \
            -DGGML_CCACHE=OFF \
            -DGGML_BLAS=ON \
            -DGGML_BLAS_VENDOR=OpenBLAS
          cmake --build build --config Release -j $(nproc)
          find build -name "*.so" -exec cp {} /app \;
          cp build/bin/llama-cli /app
          cp build/bin/llama-server /app
          cd /app
          rm -rf /run/llama.cpp
          systemctl enable llama-server
          systemctl start llama-server
      - path: /etc/systemd/system/llama-server.service
        owner: root:root
        permissions: '0644'
        content: |
          [Unit]
          Description=llama.cpp server
          After=network-online.target
          [Service]
          Type=exec
          Restart=always
          WorkingDirectory=/app
          User=root
          Group=root
          ExecStart=numactl --cpunodebind=12-23 --interleave=12-23 -- /app/build/bin/llama-server --host 0.0.0.0 --port 80 --threads-http 8 --cache-type-k q4_0 -t 48 -c 16384 --temp 0.6 -s 42 --no-kv-offload -m /media/models/unsloth/DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ2_XXS/DeepSeek-R1-UD-IQ2_XXS-00001-of-00004.gguf
          AmbientCapabilities=CAP_NET_BIND_SERVICE
          [Install]
          WantedBy=multi-user.target
    runcmd:
      - /setup/setup-application.sh
  image.architecture: amd64
  image.description: Debian bookworm amd64 (20250210_05:24)
  image.os: Debian
  image.release: bookworm
  image.serial: "20250210_05:24"
  image.type: squashfs
  image.variant: cloud
  limits.cpu: 96-191,288-383
  limits.cpu.priority: "0"
  security.privileged: "true"
  security.syscalls.intercept.mount: "true"
  security.syscalls.intercept.mount.allowed: hugetlbfs
  volatile.base_image: 1103adaa8aa5a0f10bb5fd3268bafea99e9b7662e88e739b024dad509934dd5d
  volatile.cloud-init.instance-id: e34cb050-295a-4e26-871c-47d6b4f7b298
  volatile.cpu.nodes: "6"
  volatile.eth0.host_name: veth7aa64d0d
  volatile.eth0.hwaddr: 00:16:3e:d4:91:4c
  volatile.idmap.base: "0"
  volatile.idmap.current: '[]'
  volatile.last_state.power: RUNNING
  volatile.last_state.ready: "false"
  volatile.network-primary.host_name: enp65s0f0v1
  volatile.network-primary.hwaddr: 00:16:3e:b2:36:e6
  volatile.network-primary.last_state.created: "false"
  volatile.network-primary.last_state.hwaddr: 4a:c1:1c:f5:d5:e2
  volatile.network-primary.last_state.mtu: "1500"
  volatile.network-primary.last_state.vf.hwaddr: "00:00:00:00:00:00"
  volatile.network-primary.last_state.vf.id: "1"
  volatile.network-primary.last_state.vf.parent: enp65s0f0np0
  volatile.network-primary.last_state.vf.spoofcheck: "false"
  volatile.network-primary.last_state.vf.vlan: "0"
  volatile.network-primary.name: eth1
  volatile.network-sanctuary.host_name: enp65s0f1v1
  volatile.network-sanctuary.hwaddr: 00:16:3e:0e:9a:e7
  volatile.network-sanctuary.last_state.created: "false"
  volatile.network-sanctuary.last_state.hwaddr: 16:43:4e:ea:13:50
  volatile.network-sanctuary.last_state.mtu: "1500"
  volatile.network-sanctuary.last_state.vf.hwaddr: "00:00:00:00:00:00"
  volatile.network-sanctuary.last_state.vf.id: "1"
  volatile.network-sanctuary.last_state.vf.parent: enp65s0f1np1
  volatile.network-sanctuary.last_state.vf.spoofcheck: "false"
  volatile.network-sanctuary.last_state.vf.vlan: "0"
  volatile.network-sanctuary.name: eth2
  volatile.uuid: e8cc8ac6-2b39-45d3-8c59-e8cea59d48e7
  volatile.uuid.generation: e8cc8ac6-2b39-45d3-8c59-e8cea59d48e7
 devices:
  disk-models:
    path: /media/models
    pool: hercules
    source: hypnos-models
    type: disk
  root:
    path: /
    pool: hercules
    size: 32GiB
    type: disk
 ephemeral: false
 profiles:
 - default
 - infrastructure-network-cpu0
 stateful: false
 description: LLaMA.cpp Inference Worker
	architecture: x86_64
	config:
	cloud-init.user-data: \|
	#cloud-config
	package_upgrade: true
	packages:
	- apt-transport-https
	- build-essential
	- ca-certificates
	- cmake
	- git
	- gnupg
	- libopenblas-dev
	- libssl-dev
	- lsb-release
	- numactl
	- libomp-dev
	- pkg-config
	- software-properties-common
	- wget
	write_files:
	- path: /setup/setup-application.sh
	owner: root:root
	permissions: '0700'
	content: \|
	#!/usr/bin/env bash
	set -euxo pipefail
	git clone \
	--depth=1 \
	--single-branch \
	--branch=master \
	https://github.com/ggerganov/llama.cpp \
	/run/llama.cpp
	cd /run/llama.cpp
	cmake -S . -B build \
	-DCMAKE_BUILD_TYPE=Release \
	-DLLAMA_CURL=OFF \
	-DLLAMA_BUILD_EXAMPLES=ON \
	-DGGML_NATIVE=OFF \
	-DGGML_BACKEND_DL=ON \
	-DGGML_CCACHE=OFF \
	-DGGML_BLAS=ON \
	-DGGML_BLAS_VENDOR=OpenBLAS
	cmake --build build --config Release -j $(nproc)
	find build -name "*.so" -exec cp {} /app \;
	cp build/bin/llama-cli /app
	cp build/bin/llama-server /app
	cd /app
	rm -rf /run/llama.cpp
	systemctl enable llama-server
	systemctl start llama-server
	- path: /etc/systemd/system/llama-server.service
	owner: root:root
	permissions: '0644'
	content: \|
	[Unit]
	Description=llama.cpp server
	After=network-online.target
	[Service]
	Type=exec
	Restart=always
	WorkingDirectory=/app
	User=root
	Group=root
	ExecStart=numactl --cpunodebind=12-23 --interleave=12-23 -- /app/build/bin/llama-server --host 0.0.0.0 --port 80 --threads-http 8 --cache-type-k q4_0 -t 48 -c 16384 --temp 0.6 -s 42 --no-kv-offload -m /media/models/unsloth/DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ2_XXS/DeepSeek-R1-UD-IQ2_XXS-00001-of-00004.gguf
	AmbientCapabilities=CAP_NET_BIND_SERVICE
	[Install]
	WantedBy=multi-user.target
	runcmd:
	- /setup/setup-application.sh
	image.architecture: amd64
	image.description: Debian bookworm amd64 (20250210_05:24)
	image.os: Debian
	image.release: bookworm
	image.serial: "20250210_05:24"
	image.type: squashfs
	image.variant: cloud
	limits.cpu: 96-191,288-383
	limits.cpu.priority: "0"
	security.privileged: "true"
	security.syscalls.intercept.mount: "true"
	security.syscalls.intercept.mount.allowed: hugetlbfs
	volatile.base_image: 1103adaa8aa5a0f10bb5fd3268bafea99e9b7662e88e739b024dad509934dd5d
	volatile.cloud-init.instance-id: e34cb050-295a-4e26-871c-47d6b4f7b298
	volatile.cpu.nodes: "6"
	volatile.eth0.host_name: veth7aa64d0d
	volatile.eth0.hwaddr: 00:16:3e:d4:91:4c
	volatile.idmap.base: "0"
	volatile.idmap.current: '[]'
	volatile.last_state.power: RUNNING
	volatile.last_state.ready: "false"
	volatile.network-primary.host_name: enp65s0f0v1
	volatile.network-primary.hwaddr: 00:16:3e:b2:36:e6
	volatile.network-primary.last_state.created: "false"
	volatile.network-primary.last_state.hwaddr: 4a:c1:1c:f5:d5:e2
	volatile.network-primary.last_state.mtu: "1500"
	volatile.network-primary.last_state.vf.hwaddr: "00:00:00:00:00:00"
	volatile.network-primary.last_state.vf.id: "1"
	volatile.network-primary.last_state.vf.parent: enp65s0f0np0
	volatile.network-primary.last_state.vf.spoofcheck: "false"
	volatile.network-primary.last_state.vf.vlan: "0"
	volatile.network-primary.name: eth1
	volatile.network-sanctuary.host_name: enp65s0f1v1
	volatile.network-sanctuary.hwaddr: 00:16:3e:0e:9a:e7
	volatile.network-sanctuary.last_state.created: "false"
	volatile.network-sanctuary.last_state.hwaddr: 16:43:4e:ea:13:50
	volatile.network-sanctuary.last_state.mtu: "1500"
	volatile.network-sanctuary.last_state.vf.hwaddr: "00:00:00:00:00:00"
	volatile.network-sanctuary.last_state.vf.id: "1"
	volatile.network-sanctuary.last_state.vf.parent: enp65s0f1np1
	volatile.network-sanctuary.last_state.vf.spoofcheck: "false"
	volatile.network-sanctuary.last_state.vf.vlan: "0"
	volatile.network-sanctuary.name: eth2
	volatile.uuid: e8cc8ac6-2b39-45d3-8c59-e8cea59d48e7
	volatile.uuid.generation: e8cc8ac6-2b39-45d3-8c59-e8cea59d48e7
	devices:
	disk-models:
	path: /media/models
	pool: hercules
	source: hypnos-models
	type: disk
	root:
	path: /
	pool: hercules
	size: 32GiB
	type: disk
	ephemeral: false
	profiles:
	- default
	- infrastructure-network-cpu0
	stateful: false
	description: LLaMA.cpp Inference Worker