vroomfondel · May 25, 2024 14:56
diff --git a/build_me.sh b/build_me.sh
 #!/bin/bash

 cd $(dirname "$0") || exit 123

 # builds a trimmed version of ollama (i.e. check the CPU flags and CUDA architectures below)
 # in my case: AMD Ryzen | RTX4090
 #
 # to be put run in directory cloned from ollama and HEAD pointing to some tag
 # git clone [email protected]:ollama/ollama.git
 # in that cloned repo-dir: 
 # EITHER
 #	git checkout v0.1.39
 # OR
 #	git checkout v0.1.38
 #

 # https://github.com/ollama/ollama/blob/main/docs/development.md

 currenttag=$(git tag --points-at HEAD)

 if [ "$currenttag" == "v0.1.38" ] ; then
        echo TAG $currenttag FOUND inserting patch

 cat << 'EOF' > llm/patches/06-llama.cpp.diff
 cat diff --git a/llama.cpp b/llama.cpp
 index 72c10ffc..c5b1c174 100644
 --- a/llama.cpp
 +++ b/llama.cpp
 @@ -4348,7 +4348,9 @@ static void llm_load_vocab(
                 LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
 -                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 +                LLAMA_LOG_WARN("%s:          BLABLA OVERRIDE LLAMA3 BLALBLA     \n", __func__);
 +                // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 +                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
             } else if (
                     tokenizer_pre == "default") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 EOF

 elif [ "$currenttag" == "v0.1.39" ] ; then
        echo TAG $currenttag FOUND inserting patch

 cat << 'EOF' > llm/patches/06-llama.cpp.diff
 diff --git a/llama.cpp b/llama.cpp
 index af1aede3..1d50f343 100644
 --- a/llama.cpp
 +++ b/llama.cpp
 @@ -4503,7 +4503,10 @@ static void llm_load_vocab(
                 LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
 -                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 +                // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 +               LLAMA_LOG_WARN("%s:          BLABLA OVERRIDE LLAMA3 BLALBLA     \n", __func__);
 +               // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 +                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
             } else if (
                     tokenizer_pre == "llama3"   ||
                     tokenizer_pre == "llama-v3" ||
 EOF
 else
        echo TAG UNKNOWN NOT CREATING PATCHES...
 fi


 export BUILD_ARCH="amd64"
 # https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings
 # OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" CMAKE_CUDA_ARCHITECTURES=89 go generate ./...

 #https://github.com/ollama/ollama/blob/1b0e6c9c0e5d53aa6110530da0befab7c95d1755/docs/gpu.md
 CMAKE_CUDA_ARCHITECTURES=89 go generate ./...

 # https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings
 export OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on -DCMAKE_CUDA_ARCHITECTURES=89"
 export VERSION=$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")
 export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"

 go build .
	#!/bin/bash

	cd $(dirname "$0") \|\| exit 123

	# builds a trimmed version of ollama (i.e. check the CPU flags and CUDA architectures below)
	# in my case: AMD Ryzen \| RTX4090
	#
	# to be put run in directory cloned from ollama and HEAD pointing to some tag
	# git clone [email protected]:ollama/ollama.git
	# in that cloned repo-dir:
	# EITHER
	# git checkout v0.1.39
	# OR
	# git checkout v0.1.38
	#

	# https://github.com/ollama/ollama/blob/main/docs/development.md

	currenttag=$(git tag --points-at HEAD)

	if [ "$currenttag" == "v0.1.38" ] ; then
	echo TAG $currenttag FOUND inserting patch

	cat << 'EOF' > llm/patches/06-llama.cpp.diff
	cat diff --git a/llama.cpp b/llama.cpp
	index 72c10ffc..c5b1c174 100644
	--- a/llama.cpp
	+++ b/llama.cpp
	@@ -4348,7 +4348,9 @@ static void llm_load_vocab(
	LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
	LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
	LLAMA_LOG_WARN("%s: \n", __func__);
	- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	+ LLAMA_LOG_WARN("%s: BLABLA OVERRIDE LLAMA3 BLALBLA \n", __func__);
	+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
	} else if (
	tokenizer_pre == "default") {
	vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	EOF

	elif [ "$currenttag" == "v0.1.39" ] ; then
	echo TAG $currenttag FOUND inserting patch

	cat << 'EOF' > llm/patches/06-llama.cpp.diff
	diff --git a/llama.cpp b/llama.cpp
	index af1aede3..1d50f343 100644
	--- a/llama.cpp
	+++ b/llama.cpp
	@@ -4503,7 +4503,10 @@ static void llm_load_vocab(
	LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
	LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
	LLAMA_LOG_WARN("%s: \n", __func__);
	- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	+ LLAMA_LOG_WARN("%s: BLABLA OVERRIDE LLAMA3 BLALBLA \n", __func__);
	+ // vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
	} else if (
	tokenizer_pre == "llama3" \|\|
	tokenizer_pre == "llama-v3" \|\|
	EOF
	else
	echo TAG UNKNOWN NOT CREATING PATCHES...
	fi


	export BUILD_ARCH="amd64"
	# https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings
	# OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" CMAKE_CUDA_ARCHITECTURES=89 go generate ./...

	#https://github.com/ollama/ollama/blob/1b0e6c9c0e5d53aa6110530da0befab7c95d1755/docs/gpu.md
	CMAKE_CUDA_ARCHITECTURES=89 go generate ./...

	# https://github.com/ollama/ollama/blob/main/docs/development.md#advanced-cpu-settings
	export OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on -DCMAKE_CUDA_ARCHITECTURES=89"
	export VERSION=$(git describe --tags --first-parent --abbrev=7 --long --dirty --always \| sed -e "s/^v//g")
	export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"

	go build .
No results found