-
-
Save savchenko/6ab5d039033cd5674020cd7c67f88d09 to your computer and use it in GitHub Desktop.
My llama.cpp quantize scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
URI=$1 | |
BASE=$(basename $1) | |
[ -f ../models/$BASE-f16.gguf ] && exit 0 | |
(. ../huggingface-cli/bin/activate && HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download $URI --local-dir ../models/$BASE --cache-dir ../models/$BASE/.hf-cache --exclude 'pytorch_model*' --exclude 'consolidated*' --resume-download) || exit 1 | |
DTYPE=$(jq -r '.torch_dtype' < ../models/$BASE/config.json) | |
if [ "$DTYPE" = "float16" ]; then | |
OUTTYPE=f16 | |
else | |
OUTTYPE=f32 | |
fi | |
. ./venv/bin/activate | |
./convert.py --outtype $OUTTYPE --outfile ../models/$BASE-f16.gguf.tmp ../models/$BASE || exit 1 | |
mv -f ../models/$BASE-f16.gguf.tmp ../models/$BASE-f16.gguf || exit 1 | |
if [ -f ../models/$BASE-f16.gguf ]; then | |
rm -Rf ../models/$BASE | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
URI=$1 | |
BASE=$(basename $1) | |
./my-clone $URI || exit 1 | |
./my-imatrix ../models/$BASE || exit 1 | |
./my-quantize ../models/$BASE || exit 1 | |
truncate -s0 ../models/$BASE-f16.gguf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
BASE=$1 | |
M=$(basename $1) | |
IMQ=f16 | |
if [ -f $BASE-imatrix.dat ]; then | |
exit 0 | |
fi | |
if [ ! -f $BASE-$IMQ.gguf ]; then | |
make clean && make quantize || exit 1 | |
./quantize $BASE-f16.gguf $BASE-$IMQ.gguf.tmp $IMQ && mv -f $BASE-$IMQ.gguf.tmp $BASE-$IMQ.gguf | |
fi | |
make clean && make LLAMA_HIPBLAS=1 AMDGPU_TARGETS=gfx1030 imatrix || exit 1 | |
HORDE=$(systemctl --user is-active --quiet horde-bridge.service) | |
[ "x$HORDE" = "xactive" ] && systemctl --user stop horde-bridge.service | |
./imatrix -m $BASE-$IMQ.gguf -f wiki.train.raw --chunks 200 --no-ppl -ofreq 200 --no-mmap -ngl 7 || exit 1 | |
[ "x$HORDE" = "xactive" ] && systemctl --user start horde-bridge.service | |
mv -f imatrix.dat $BASE-imatrix.dat || exit 1 | |
if [ "$IMQ" != "f16" ]; then | |
rm -f $BASE-$IMQ.gguf | |
fi | |
. ../huggingface-cli/bin/activate | |
HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload Artefact2/$M-GGUF $BASE-imatrix.dat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#define _FILE_OFFSET_BITS 64 | |
#include <stdio.h> | |
#include <fcntl.h> | |
#include <unistd.h> | |
#include <string.h> | |
#include <unistd.h> | |
int main(int argc, char** argv) { | |
if(argc < 3) { | |
fprintf(stderr, "Usage: %s file.000 file.001... merge.out\n", argv[0]); | |
return 1; | |
} | |
int dest_fd = open(argv[argc-1], O_CREAT|O_TRUNC|O_WRONLY, 0666); | |
if(dest_fd == -1) { | |
perror("dest open()"); | |
return 1; | |
} | |
const size_t CFRSIZE = 2048*1024*1024l; | |
for(int i = 1; i < argc-1; ++i) { | |
int in_fd = open(argv[i], O_RDONLY); | |
if(in_fd == -1) { | |
perror("in open()"); | |
return 1; | |
} | |
ssize_t ret; | |
/* copy_file_range() returns 0 on EOF */ | |
while((ret = copy_file_range(in_fd, NULL, dest_fd, NULL, CFRSIZE, 0)) != 0) { | |
if(ret == -1) { | |
perror("copy_file_range()"); | |
return 1; | |
} | |
} | |
close(in_fd); | |
} | |
close(dest_fd); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
BASE=$1 | |
M=$(basename $1) | |
make clean && make quantize || exit 1 | |
. ../huggingface-cli/bin/activate | |
for x in Q{4,5}_K_{S,M} Q6_K IQ4_XS IQ{3,2}_{M,S,XS,XXS} IQ1_S; do | |
if [ ! -f $BASE-$x.gguf ]; then | |
./quantize --imatrix $BASE-imatrix.dat $BASE-f16.gguf $BASE-$x.gguf.tmp $x || exit 1 | |
mv -f $BASE-$x.gguf.tmp $BASE-$x.gguf || exit 1 | |
./my-split $BASE-$x.gguf || exit 1 | |
while pgrep -f huggingface-cli >/dev/null; do | |
sleep 5 | |
done | |
if [ -f $BASE-$x.gguf.000 ]; then | |
(HF_HUB_ENABLE_HF_TRANSFER=1 parallel --env HF_HUB_ENABLE_HF_TRANSFER --ungroup -n1 -j1 "huggingface-cli upload Artefact2/$M-GGUF {} && truncate -s0 {}" ::: $BASE-$x.gguf.*) & | |
else | |
(HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload Artefact2/$M-GGUF $BASE-$x.gguf && truncate -s0 $BASE-$x.gguf) & | |
fi | |
fi | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#define _FILE_OFFSET_BITS 64 | |
#include <stdio.h> | |
#include <fcntl.h> | |
#include <unistd.h> | |
#include <string.h> | |
#include <unistd.h> | |
int main(int argc, char** argv) { | |
if(argc != 2) { | |
fprintf(stderr, "Usage: %s file.gguf\n", argv[0]); | |
return 1; | |
} | |
int src_fd = open(argv[1], O_RDONLY); | |
if(src_fd == -1) { | |
perror("src open()"); | |
return 1; | |
} | |
off_t src_len = lseek(src_fd, 0, SEEK_END); | |
if(src_len == -1) { | |
perror("lseek()"); | |
return 1; | |
} | |
lseek(src_fd, 0, SEEK_SET); | |
/* 50GB (NOT GiB) */ | |
const off_t BSIZE = 372*128*1024*1024l; /* Must be a multiple of CFRSIZE */ | |
const size_t CFRSIZE = 128*1024*1024l; | |
if(src_len < BSIZE) return 0; /* No need to split */ | |
unsigned int n = 0; | |
char dest_path[strlen(argv[1]) + 10]; | |
int dest_fd; | |
for(off_t i = 0; i < src_len; i += BSIZE) { | |
snprintf(dest_path, sizeof(dest_path)-1, "%s.%03u", argv[1], n); | |
dest_fd = open(dest_path, O_CREAT|O_TRUNC|O_WRONLY, 0666); | |
if(dest_fd == -1) { | |
perror("dest open()"); | |
return 1; | |
} | |
for(off_t k = 0; k < BSIZE; k += CFRSIZE) { | |
if(copy_file_range(src_fd, NULL, dest_fd, NULL, CFRSIZE, 0) == -1) { | |
perror("copy_file_range()"); | |
return 1; | |
} | |
} | |
close(dest_fd); | |
printf("%s\n", dest_path); | |
n += 1; | |
} | |
close(src_fd); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment