Skip to content

Instantly share code, notes, and snippets.

make win64 && make winclang && make wingcc
for /L %i in (1,1,3) do @(for %x in (runmingw.exe rungcc.exe run.exe) do (set OMP_NUM_THREADS=4 && %x ../out/model110M.bin 0 0 "Once upon a time" 0))
for /f "tokens=4" %i in ('chcp') do @(chcp 65001 && @echo off && for /L %j in (1,1,3) do @(for %x in (runmingw.exe rungcc.exe run.exe) do @(set OMP_NUM_THREADS=4 && %x ../out/model110M.bin 0 0 "And away they went" 0)) && @echo on && chcp %i)
@twobob
twobob / base64_urlsafe.c
Created July 31, 2023 13:31
base64_urlsafe kinda roughly. for the recoverable filenames.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <direct.h>
#include <time.h>
static inline void process_triplet(const char *input, size_t i, uint32_t *triplet) {
*triplet = (input[i] << 16) | (i + 1 < strlen(input) ? input[i + 1] << 8 : 0) | (i + 2 < strlen(input) ? input[i + 2] : 0);
}
@twobob
twobob / run_blocks.c
Last active July 28, 2023 03:31
This version will spit out story blocks as timestamped .txt files, as fast as possible, to a folder named inbox WINDOWS VERSION YMMV
/*
Inference for Llama-2 Transformer model in pure C.
This version will spit out story blocks as fast as possible to a folder called inbox
Metrics are shown per story, no doubt this could be faster.
Output using -03 and no -fopenmp, with token-by-token reporting on the test machine gave 6-8 tok/s second.
Compiling as outlined below and foregoing constant screen output nets between 80-330 tok/s on the same machine.
So between 10 - 55 times faster.
Example compile: (see README for more details)
@twobob
twobob / run.c
Last active July 25, 2023 14:16
gcc -o run run.c -lm -fopenmp -Wall -Wextra -Wpedantic -Wformat=2 -Wcast-align -Wconversion -Wsign-conversion -Wnull-dereference -g3 -Ofast
/*
Inference for Llama-2 Transformer model in pure C.
Example compile: (see README for more details)
$ gcc -O3 -o run run.c -lm
Then run with:
$ ./run
*/
#!/usr/bin/env python
# Based on: https://github.com/oobabooga/text-generation-webui/blob/main/convert-to-torch.py
# License: GNU Affero General Public License v3.0
#
#
# This script converts a transformers model using a custom shard size.
#
# Load a model from a directory and shard it into 2GB chunks:
# python reshard-causallm-model.py --src-model gpt-j-6B --out-path gpt-j-6B-sharded --torch_dtype float16 --max-shard-size 2GB
@twobob
twobob / cmp_and_test.py
Created July 22, 2023 23:12
hecks the char set unions and if valid jsonl
import json
def is_valid_jsonl(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
for line_number, line in enumerate(f, start=1):
try:
json.loads(line)
except json.JSONDecodeError:
print(f'Invalid JSON on line {line_number}: {line}')
return False
@twobob
twobob / delete_dirs.py
Created July 22, 2023 23:09
'Delete all subdirectories in the specified directory, except for the largest n subdirectories
import os
import heapq
import shutil
import argparse
def get_directory_size(directory):
"""
Calculate the size of the directory.
Parameters:
@twobob
twobob / qlorsStack.py
Created July 21, 2023 20:23
QLORA from Stackexchange zim exports
import argparse
import os
import sys
import random
import codecs
import json
from bs4 import BeautifulSoup
from multiprocessing import Pool
import colorama
from colorama import Fore
@twobob
twobob / Wget command
Created January 9, 2023 06:14
Wget command to download the default ckpt file for fine-tuning directly to google drive from colab
!wget https://model-server.zqevans2.workers.dev/jmann-small-190k.ckpt -O /content/drive/MyDrive/AI/models/jmann-small-190k.ckpt
@twobob
twobob / duall.sh
Created October 16, 2022 23:18
list disk usage without warnings as precis
du -cBM --max-depth=1 2> >(grep -v 'Permission denied') | grep -v 'cannot access' | sort -n