Skip to content

Instantly share code, notes, and snippets.

@savchenko
Forked from Artefact2/my-clone
Created October 14, 2024 03:55
Show Gist options
  • Save savchenko/6ab5d039033cd5674020cd7c67f88d09 to your computer and use it in GitHub Desktop.
Save savchenko/6ab5d039033cd5674020cd7c67f88d09 to your computer and use it in GitHub Desktop.
My llama.cpp quantize scripts
#!/usr/bin/env sh
URI=$1
BASE=$(basename $1)
[ -f ../models/$BASE-f16.gguf ] && exit 0
(. ../huggingface-cli/bin/activate && HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download $URI --local-dir ../models/$BASE --cache-dir ../models/$BASE/.hf-cache --exclude 'pytorch_model*' --exclude 'consolidated*' --resume-download) || exit 1
DTYPE=$(jq -r '.torch_dtype' < ../models/$BASE/config.json)
if [ "$DTYPE" = "float16" ]; then
OUTTYPE=f16
else
OUTTYPE=f32
fi
. ./venv/bin/activate
./convert.py --outtype $OUTTYPE --outfile ../models/$BASE-f16.gguf.tmp ../models/$BASE || exit 1
mv -f ../models/$BASE-f16.gguf.tmp ../models/$BASE-f16.gguf || exit 1
if [ -f ../models/$BASE-f16.gguf ]; then
rm -Rf ../models/$BASE
fi
#!/usr/bin/env sh
URI=$1
BASE=$(basename $1)
./my-clone $URI || exit 1
./my-imatrix ../models/$BASE || exit 1
./my-quantize ../models/$BASE || exit 1
truncate -s0 ../models/$BASE-f16.gguf
#!/usr/bin/env sh
BASE=$1
M=$(basename $1)
IMQ=f16
if [ -f $BASE-imatrix.dat ]; then
exit 0
fi
if [ ! -f $BASE-$IMQ.gguf ]; then
make clean && make quantize || exit 1
./quantize $BASE-f16.gguf $BASE-$IMQ.gguf.tmp $IMQ && mv -f $BASE-$IMQ.gguf.tmp $BASE-$IMQ.gguf
fi
make clean && make LLAMA_HIPBLAS=1 AMDGPU_TARGETS=gfx1030 imatrix || exit 1
HORDE=$(systemctl --user is-active --quiet horde-bridge.service)
[ "x$HORDE" = "xactive" ] && systemctl --user stop horde-bridge.service
./imatrix -m $BASE-$IMQ.gguf -f wiki.train.raw --chunks 200 --no-ppl -ofreq 200 --no-mmap -ngl 7 || exit 1
[ "x$HORDE" = "xactive" ] && systemctl --user start horde-bridge.service
mv -f imatrix.dat $BASE-imatrix.dat || exit 1
if [ "$IMQ" != "f16" ]; then
rm -f $BASE-$IMQ.gguf
fi
. ../huggingface-cli/bin/activate
HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload Artefact2/$M-GGUF $BASE-imatrix.dat
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <unistd.h>
int main(int argc, char** argv) {
if(argc < 3) {
fprintf(stderr, "Usage: %s file.000 file.001... merge.out\n", argv[0]);
return 1;
}
int dest_fd = open(argv[argc-1], O_CREAT|O_TRUNC|O_WRONLY, 0666);
if(dest_fd == -1) {
perror("dest open()");
return 1;
}
const size_t CFRSIZE = 2048*1024*1024l;
for(int i = 1; i < argc-1; ++i) {
int in_fd = open(argv[i], O_RDONLY);
if(in_fd == -1) {
perror("in open()");
return 1;
}
ssize_t ret;
/* copy_file_range() returns 0 on EOF */
while((ret = copy_file_range(in_fd, NULL, dest_fd, NULL, CFRSIZE, 0)) != 0) {
if(ret == -1) {
perror("copy_file_range()");
return 1;
}
}
close(in_fd);
}
close(dest_fd);
return 0;
}
#!/usr/bin/env sh
BASE=$1
M=$(basename $1)
make clean && make quantize || exit 1
. ../huggingface-cli/bin/activate
for x in Q{4,5}_K_{S,M} Q6_K IQ4_XS IQ{3,2}_{M,S,XS,XXS} IQ1_S; do
if [ ! -f $BASE-$x.gguf ]; then
./quantize --imatrix $BASE-imatrix.dat $BASE-f16.gguf $BASE-$x.gguf.tmp $x || exit 1
mv -f $BASE-$x.gguf.tmp $BASE-$x.gguf || exit 1
./my-split $BASE-$x.gguf || exit 1
while pgrep -f huggingface-cli >/dev/null; do
sleep 5
done
if [ -f $BASE-$x.gguf.000 ]; then
(HF_HUB_ENABLE_HF_TRANSFER=1 parallel --env HF_HUB_ENABLE_HF_TRANSFER --ungroup -n1 -j1 "huggingface-cli upload Artefact2/$M-GGUF {} && truncate -s0 {}" ::: $BASE-$x.gguf.*) &
else
(HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload Artefact2/$M-GGUF $BASE-$x.gguf && truncate -s0 $BASE-$x.gguf) &
fi
fi
done
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <unistd.h>
int main(int argc, char** argv) {
if(argc != 2) {
fprintf(stderr, "Usage: %s file.gguf\n", argv[0]);
return 1;
}
int src_fd = open(argv[1], O_RDONLY);
if(src_fd == -1) {
perror("src open()");
return 1;
}
off_t src_len = lseek(src_fd, 0, SEEK_END);
if(src_len == -1) {
perror("lseek()");
return 1;
}
lseek(src_fd, 0, SEEK_SET);
/* 50GB (NOT GiB) */
const off_t BSIZE = 372*128*1024*1024l; /* Must be a multiple of CFRSIZE */
const size_t CFRSIZE = 128*1024*1024l;
if(src_len < BSIZE) return 0; /* No need to split */
unsigned int n = 0;
char dest_path[strlen(argv[1]) + 10];
int dest_fd;
for(off_t i = 0; i < src_len; i += BSIZE) {
snprintf(dest_path, sizeof(dest_path)-1, "%s.%03u", argv[1], n);
dest_fd = open(dest_path, O_CREAT|O_TRUNC|O_WRONLY, 0666);
if(dest_fd == -1) {
perror("dest open()");
return 1;
}
for(off_t k = 0; k < BSIZE; k += CFRSIZE) {
if(copy_file_range(src_fd, NULL, dest_fd, NULL, CFRSIZE, 0) == -1) {
perror("copy_file_range()");
return 1;
}
}
close(dest_fd);
printf("%s\n", dest_path);
n += 1;
}
close(src_fd);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment