hiiamboris · October 17, 2025 14:44
diff --git a/provision.sh b/provision.sh
 # generate an api key so others don't abuse this server; use it to authenticate the API calls
 export MODEL_KEY="$(openssl rand -base64 24)"

 # recommended by the docs
 env >> /etc/environment

 # update the OS
 export DEBIAN_FRONTEND=noninteractive
 apt update; apt upgrade -y
 apt install -y nvtop

 # create a server runner script  
 echo '#!/bin/bash' >~/run-server
 echo "echo === Your API key is: $MODEL_KEY ===" >>~/run-server
 echo docker run -d --restart unless-stopped --gpus all --name llama-server -v ~/models:/models -p 8000:8000 ghcr.io/ggml-org/llama.cpp:server-cuda -m "/models/$MODEL_NAME" $MODEL_FLAGS --port 8000 --host 0.0.0.0 --n-gpu-layers 999 --api-key "$MODEL_KEY" >>~/run-server
 chmod 700 ~/run-server

 # install FAR manager for convenient administration 
 wget https://github.com/spvkgn/far2l-portable/releases/download/latest/far2l-x86_64.run.tar; tar -xf far2l-x86_64.run.tar; cp "$(tar -tf far2l-x86_64.run.tar)" /usr/sbin/far2l; chmod 755 /usr/sbin/far2l; echo "far2l; exit" >>~/.bash_profile

 # download the model and the container
 mkdir ~/models; wget -O "$HOME/models/$MODEL_NAME" "$MODEL_URL"
 docker pull ghcr.io/ggml-org/llama.cpp:server-cuda

 # run the server
 ~/run-server
	# generate an api key so others don't abuse this server; use it to authenticate the API calls
	export MODEL_KEY="$(openssl rand -base64 24)"

	# recommended by the docs
	env >> /etc/environment

	# update the OS
	export DEBIAN_FRONTEND=noninteractive
	apt update; apt upgrade -y
	apt install -y nvtop

	# create a server runner script
	echo '#!/bin/bash' >~/run-server
	echo "echo === Your API key is: $MODEL_KEY ===" >>~/run-server
	echo docker run -d --restart unless-stopped --gpus all --name llama-server -v ~/models:/models -p 8000:8000 ghcr.io/ggml-org/llama.cpp:server-cuda -m "/models/$MODEL_NAME" $MODEL_FLAGS --port 8000 --host 0.0.0.0 --n-gpu-layers 999 --api-key "$MODEL_KEY" >>~/run-server
	chmod 700 ~/run-server

	# install FAR manager for convenient administration
	wget https://github.com/spvkgn/far2l-portable/releases/download/latest/far2l-x86_64.run.tar; tar -xf far2l-x86_64.run.tar; cp "$(tar -tf far2l-x86_64.run.tar)" /usr/sbin/far2l; chmod 755 /usr/sbin/far2l; echo "far2l; exit" >>~/.bash_profile

	# download the model and the container
	mkdir ~/models; wget -O "$HOME/models/$MODEL_NAME" "$MODEL_URL"
	docker pull ghcr.io/ggml-org/llama.cpp:server-cuda

	# run the server
	~/run-server
No results found