Created
May 25, 2024 14:56
-
-
Save vroomfondel/47b5756c5b6fca6d22ce0e3a6e2fb048 to your computer and use it in GitHub Desktop.
ollama build script with patch for llama3 llama.cpp BPE pretokenization issue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
cd $(dirname "$0") || exit 123 | |
# builds a trimmed version of ollama (i.e. check the CPU flags and CUDA architectures below) | |
# in my case: AMD Ryzen | RTX4090 | |
# | |
# to be put run in directory cloned from ollama and HEAD pointing to some tag | |
# git clone [email protected]:ollama/ollama.git | |
# in that cloned repo-dir: | |
# EITHER | |
# git checkout v0.1.39 | |
# OR | |
# git checkout v0.1.38 | |
# | |
# https://github.com/ollama/ollama/blob/main/docs/development.md | |
currenttag=$(git tag --points-at HEAD) | |
if [ "$currenttag" == "v0.1.38" ] ; then | |
echo TAG $currenttag FOUND inserting patch | |
cat << 'EOF' > llm/patches/06-llama.cpp.diff | |
cat diff --git a/llama.cpp b/llama.cpp | |
index 72c10ffc..c5b1c174 100644 | |
--- a/llama.cpp | |
+++ b/llama.cpp | |
@@ -4348,7 +4348,9 @@ static void llm_load_vocab( | |
LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__); | |
LLAMA_LOG_WARN("%s: ************************************ \n", __func__); | |
LLAMA_LOG_WARN("%s: \n", __func__); | |
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; | |
+ LLAMA_LOG_WARN("%s: BLABLA OVERRIDE LLAMA3 BLALBLA \n", __func__); | |
+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; | |
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3; | |
} else if ( | |
tokenizer_pre == "default") { | |
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; | |
EOF | |
elif [ "$currenttag" == "v0.1.39" ] ; then | |
echo TAG $currenttag FOUND inserting patch | |
cat << 'EOF' > llm/patches/06-llama.cpp.diff | |
diff --git a/llama.cpp b/llama.cpp | |
index af1aede3..1d50f343 100644 | |
--- a/llama.cpp | |
+++ b/llama.cpp | |
@@ -4503,7 +4503,10 @@ static void llm_load_vocab( | |
LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__); | |
LLAMA_LOG_WARN("%s: ************************************ \n", __func__); | |
LLAMA_LOG_WARN("%s: \n", __func__); | |
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; | |
+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; | |
+ LLAMA_LOG_WARN("%s: BLABLA OVERRIDE LLAMA3 BLALBLA \n", __func__); | |
+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; | |
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3; | |
} else if ( | |
tokenizer_pre == "llama3" || | |
tokenizer_pre == "llama-v3" || | |
EOF | |
else | |
echo TAG UNKNOWN NOT CREATING PATCHES... | |
fi | |
export BUILD_ARCH="amd64" | |
# https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings | |
# OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" CMAKE_CUDA_ARCHITECTURES=89 go generate ./... | |
#https://github.com/ollama/ollama/blob/1b0e6c9c0e5d53aa6110530da0befab7c95d1755/docs/gpu.md | |
CMAKE_CUDA_ARCHITECTURES=89 go generate ./... | |
# https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings | |
export OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on -DCMAKE_CUDA_ARCHITECTURES=89" | |
export VERSION=$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g") | |
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" | |
go build . |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment