Last active
August 3, 2023 23:32
-
-
Save lyjia/0c1558d78740287b1bacb3c9575ed6ba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -e | |
# example llama.cpp cmdstring: | |
# ./main -m ~/data/ml/LLaMA/LLaMA-7B-ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt | |
### CONFIGURATION ### | |
# Set the following to match your location for compiled llama.cpp | |
LLAMA_DIR=~/src/llama.cpp | |
LLAMA_BIN=main | |
# Set the following to match the location of system prompt text files. Each text file should contain exactly one prompt. | |
SYSPROMPTS_DIR=~/scripts/prompts/sys | |
# Set this to match the folder where your LLaMA2 models are installed. | |
MODELS_DIR=~/data/ml/LLaMA2 | |
### END CONFIGURATION ### | |
### MODEL LIBRARY ### | |
# The following vars define the various LLaMA2 models that are available for this script to use. Note that you will need to update the case statement in 'Load the user requested model' to match the variables here. | |
MODEL_LLAMA2_13B_Q8_CHAT=llama-2-13b-chat/llama-2-13b-chat.ggmlv3.q8_0.bin | |
MODEL_LLAMA2_13B_Q41_CHAT=llama-2-13b-chat/llama-2-13b-chat.ggmlv3.q4_1.bin | |
MODEL_LLAMA2_13B_CHAT=${MODEL_LLAMA2_13B_Q8_CHAT} | |
MODEL_LLAMA2_70B_Q41_CHAT=llama-2-70b-chat/llama-2-70b-chat.ggmlv3.q4_1.bin | |
MODEL_LLAMA2_70B_CHAT=${MODEL_LLAMA2_70B_Q41_CHAT} | |
MODEL_NOUS_HERMES_13B_Q41=Nous-Hermes-Llama2-13b/nous-hermes-llama2-13b.ggmlv3.q4_1.bin | |
MODEL_NOUS_HERMES_13B_Q8=Nous-Hermes-Llama2-13b/nous-hermes-llama2-13b.ggmlv3.q8_0.bin | |
MODEL_NOUS_HERMES_13B=${MODEL_NOUS_HERMES_13B_Q8} | |
### END MODEL LIBRARY ### | |
MODEL_NOEXIST=kakakakaka | |
CMDSTRING="$LLAMA_DIR/$LLAMA_BIN --color -i -r \"User:\"" | |
sysprompt="" | |
prompt="" | |
model="" | |
numtokens=256 | |
# Show help and quit if requested | |
if [[ "$1" =~ ^(-h|--help|-?|\/?)$ ]]; then | |
echo "A quick shell script that automates some of the more tedious aspects of invoking LLaMA2 models via llama.cpp." | |
echo "Written by Lyjia, August 2023. https://gist.github.com/lyjia/0c1558d78740287b1bacb3c9575ed6ba" | |
echo -e "\nTo run this script: \n\n\t$0 -m <model name from list below> -p <text prompt or path to text file with prompt> -s <system prompt or path to text file with system prompt>" | |
exit 0 | |
fi | |
while getopts m:p:s:n: flag | |
do | |
case "${flag}" in | |
m) model=${OPTARG};; | |
p) prompt=${OPTARG};; | |
s) sysprompt=${OPTARG};; | |
n) numtokens=${OPTARG};; | |
esac | |
done | |
# Load the user-requested model | |
case "${model}" in | |
13b-chat) modelfile=${MODEL_LLAMA2_13B_CHAT}; promptformat=llama2;; | |
13b-q41-chat) modelfile=${MODEL_LLAMA2_13B_Q41_CHAT}; promptformat=llama2;; | |
13b-q8-chat) modelfile=${MODEL_LLAMA2_13B_Q8_CHAT}; promptformat=llama2;; | |
70b-chat) modelfile=${MODEL_LLAMA2_70B_CHAT}; promptformat=llama2; gqa=8;; | |
70b-q41-chat) modelfile=${MODEL_LLAMA2_70B_Q41_CHAT}; promptformat=llama2; gqa=8;; | |
nous-13b) modelfile=${MODEL_NOUS_HERMES_13B}; promptformat=alpaca;; | |
nous-13b-q41) modelfile=${MODEL_NOUS_HERMES_13B_Q41}; promptformat=alpaca;; | |
nous-13b-q8) modelfile=${MODEL_NOUS_HERMES_13B_Q8}; promptformat=alpaca;; | |
noexist) modelfile=${MODEL_NOEXIST}; promptformat=none;; | |
esac | |
if [[ -z "$modelfile" ]]; then | |
echo "Need to specify a LLaMA2 model file to use. Options are: 13b-chat, 13b-q41-chat, 13b-q8-chat, 70b-chat, 70b-q41-chat, nous-13b, nous-13b-q41, nous-13b-q8" | |
exit -1 | |
fi | |
if [[ -f "${MODELS_DIR}/$modelfile" ]]; then | |
echo "Using ${modelfile}..." | |
else | |
echo "Expected to find ${modelfile} but it doesn't exist!" | |
exit -2 | |
fi | |
# Load the user-requested prompt | |
if [[ -z "$prompt" ]]; then | |
echo "Need to specify a prompt or prompt file with -p." | |
exit -3 | |
fi | |
if [[ -f "$prompt" ]]; then | |
echo "Loading prompt from ${prompt}..." | |
PROMPT_FILE=${prompt} | |
MAINPROMPT=`cat ${prompt}` | |
else | |
MAINPROMPT=${prompt} | |
fi | |
# Load the user-requested system prompt | |
if [[ -z "$sysprompt" ]]; then | |
echo "No system prompt given." | |
FINALPROMPT=${MAINPROMPT} | |
else | |
if [[ -f "$sysprompt" ]]; then | |
echo "Loading system prompt from ${sysprompt}..." | |
SYSTEMPROMPT=`cat ${sysprompt}` | |
else | |
SYSTEMPROMPT=${sysprompt} | |
fi | |
if [[ $promptformat == "llama2" ]]; then | |
FINALPROMPT="\"[INST]<<SYS>>${SYSTEMPROMPT}<</SYS>>${MAINPROMPT}[/INST]\"" | |
elif [[ $promptformat == "alpaca" ]]; then | |
FINALPROMPT="### Instruction:\n${MAINPROMPT}\n\n### Input: ${SYSTEMPROMPT}\n\n### Reponse:\n" | |
fi | |
fi | |
# Process any other necessary switches | |
if [[ "$gqa" ]]; then | |
echo Using GQA value of ${gqa}... | |
SWITCH_GQA="-gqa ${gqa}" | |
fi | |
if [[ "$numtokens" ]]; then | |
echo Output tokens: $numtokens... | |
SWITCH_NUMTOKENS="-n $numtokens" | |
fi | |
FINALCMDSTRING="${CMDSTRING} ${SWITCH_GQA} ${SWITCH_NUMTOKENS} -m ${MODELS_DIR}/${modelfile} -p \"${FINALPROMPT}\"" | |
#echo About to execute: ${FINALCMDSTRING} | |
echo =============================================================================== | |
eval ${FINALCMDSTRING} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment