Skip to content

Instantly share code, notes, and snippets.

@vroomfondel
Created May 25, 2024 14:56
Show Gist options
  • Save vroomfondel/47b5756c5b6fca6d22ce0e3a6e2fb048 to your computer and use it in GitHub Desktop.
Save vroomfondel/47b5756c5b6fca6d22ce0e3a6e2fb048 to your computer and use it in GitHub Desktop.
ollama build script with patch for llama3 llama.cpp BPE pretokenization issue
#!/bin/bash
cd $(dirname "$0") || exit 123
# builds a trimmed version of ollama (i.e. check the CPU flags and CUDA architectures below)
# in my case: AMD Ryzen | RTX4090
#
# to be put run in directory cloned from ollama and HEAD pointing to some tag
# git clone [email protected]:ollama/ollama.git
# in that cloned repo-dir:
# EITHER
# git checkout v0.1.39
# OR
# git checkout v0.1.38
#
# https://github.com/ollama/ollama/blob/main/docs/development.md
currenttag=$(git tag --points-at HEAD)
if [ "$currenttag" == "v0.1.38" ] ; then
echo TAG $currenttag FOUND inserting patch
cat << 'EOF' > llm/patches/06-llama.cpp.diff
cat diff --git a/llama.cpp b/llama.cpp
index 72c10ffc..c5b1c174 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4348,7 +4348,9 @@ static void llm_load_vocab(
LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
LLAMA_LOG_WARN("%s: \n", __func__);
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ LLAMA_LOG_WARN("%s: BLABLA OVERRIDE LLAMA3 BLALBLA \n", __func__);
+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
} else if (
tokenizer_pre == "default") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
EOF
elif [ "$currenttag" == "v0.1.39" ] ; then
echo TAG $currenttag FOUND inserting patch
cat << 'EOF' > llm/patches/06-llama.cpp.diff
diff --git a/llama.cpp b/llama.cpp
index af1aede3..1d50f343 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4503,7 +4503,10 @@ static void llm_load_vocab(
LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
LLAMA_LOG_WARN("%s: \n", __func__);
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ LLAMA_LOG_WARN("%s: BLABLA OVERRIDE LLAMA3 BLALBLA \n", __func__);
+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
} else if (
tokenizer_pre == "llama3" ||
tokenizer_pre == "llama-v3" ||
EOF
else
echo TAG UNKNOWN NOT CREATING PATCHES...
fi
export BUILD_ARCH="amd64"
# https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings
# OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" CMAKE_CUDA_ARCHITECTURES=89 go generate ./...
#https://github.com/ollama/ollama/blob/1b0e6c9c0e5d53aa6110530da0befab7c95d1755/docs/gpu.md
CMAKE_CUDA_ARCHITECTURES=89 go generate ./...
# https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings
export OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on -DCMAKE_CUDA_ARCHITECTURES=89"
export VERSION=$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
go build .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment