sureshg · July 20, 2023 03:55
diff --git a/index.http b/index.http
 ### Llama 2 Chat
 POST http://127.0.0.1:8080/completion
 Content-Type: application/json

 {
  "prompt": "What is Java Language?",
  "temperature": 0.7
 }

 ### Llama 2 tokenize
 POST http://127.0.0.1:8080/tokenize
 Content-Type: application/json

 {
  "content": "What is Java Language?"
 }
diff --git a/llama2-mac-gpu-server.sh b/llama2-mac-gpu-server.sh
 # Clone llama.cpp
 git clone https://github.com/ggerganov/llama.cpp.git
 cd llama.cpp

 # Build it. If your Mac's processor is Intel CPU, please remove `LLAMA_METAL=1`
 LLAMA_METAL=1 make

 # Download model
 export MODEL=llama-2-13b-chat.ggmlv3.q4_0.bin
 wget "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/${MODEL}"

 # Run
 ./server -t 8 -ngl 1 -m ${MODEL} 

 # curl to test API
 curl -X POST --location "http://127.0.0.1:8080/completion" \
    -H "Content-Type: application/json" \
    -d "{
          \"prompt\": \"What is Java Language?\",
          \"temperature\": 0.7
        }"
	### Llama 2 Chat
	POST http://127.0.0.1:8080/completion
	Content-Type: application/json

	{
	"prompt": "What is Java Language?",
	"temperature": 0.7
	}

	### Llama 2 tokenize
	POST http://127.0.0.1:8080/tokenize
	Content-Type: application/json

	{
	"content": "What is Java Language?"
	}
	# Clone llama.cpp
	git clone https://github.com/ggerganov/llama.cpp.git
	cd llama.cpp

	# Build it. If your Mac's processor is Intel CPU, please remove `LLAMA_METAL=1`
	LLAMA_METAL=1 make

	# Download model
	export MODEL=llama-2-13b-chat.ggmlv3.q4_0.bin
	wget "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/${MODEL}"

	# Run
	./server -t 8 -ngl 1 -m ${MODEL}

	# curl to test API
	curl -X POST --location "http://127.0.0.1:8080/completion" \
	-H "Content-Type: application/json" \
	-d "{
	\"prompt\": \"What is Java Language?\",
	\"temperature\": 0.7
	}"