# I got this working on a AWS EC2 G5 extra large instance with 80gb of storage
# Install required build packages and Python libraries
sudo ACCEPT_EULA=Y apt-get update
sudo ACCEPT_EULA=Y apt-get upgrade
sudo apt-get install software-properties-common build-essential libopenblas-dev ninja-build pkg-config cmake-data clang nvidia-cuda-toolkit
sudo apt-get install git git-lfs curl wget zip unzip
git lfs install
sudo apt-get install python3 python3-pip python-is-python3
python -m pip install --upgrade --no-warn-script-location pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings sentencepiece numpy torch safetensors tqdm
# Install Rust
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source "$HOME/.cargo/env"
rustup target add wasm32-wasi
# Install WasmEdge with an LLM inference backend
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh | bash
# Setup Ngrok
curl -LO https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
sudo tar xvzf ngrok-v3-stable-linux-amd64.tgz -C /usr/local/bin
rm ngrok*
ngrok config add-authtoken YOUR-AUTHTOKEN
# llama setup - I had to use hf data to get it to work
git clone https://huggingface.co/meta-llama/Llama-2-7b-hf
# enter username and Hugging Face access token
# get llama.cpp code and move llama data into models dir
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
mv ../Llama-2-7b-chat-hf models/
# convert from torch to ggml
python convert_hf_to_gguf.py models/Llama-2-7b-hf/ --outtype f16
# rename file
mv ggml-model-f16.gguf llama-2-7b-chat-f16.gguf
# if filesystem is running out of apce, get rid of bloated git dir
rm -rf .git
# compile llama.cpp
mkdir build
cd build
cmake .. -DGGML_CUDA=ON -DCMAKE_CUDA_COMPILER=/usr/bin/nvcc
cmake --build . --config Release
# quantize data
bin/llama-quantize ../models/Llama-2-7b-hf/llama-2-7b-hf-f16.gguf ../models/Llama-2-7b-hf/llama-2-7b-hf-q5_k_m.gguf Q5_K_M
Last active
July 13, 2024 21:02
-
-
Save beaugaines/7ce9dc386662e820a0cdab6410337267 to your computer and use it in GitHub Desktop.
Working setup for Manning 'Chatbot with Llama' project
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment