Skip to content

Instantly share code, notes, and snippets.

@twobob
twobob / run.c
Last active July 25, 2023 14:16
gcc -o run run.c -lm -fopenmp -Wall -Wextra -Wpedantic -Wformat=2 -Wcast-align -Wconversion -Wsign-conversion -Wnull-dereference -g3 -Ofast
/*
Inference for Llama-2 Transformer model in pure C.
Example compile: (see README for more details)
$ gcc -O3 -o run run.c -lm
Then run with:
$ ./run
*/
#!/usr/bin/env python
# Based on: https://github.com/oobabooga/text-generation-webui/blob/main/convert-to-torch.py
# License: GNU Affero General Public License v3.0
#
#
# This script converts a transformers model using a custom shard size.
#
# Load a model from a directory and shard it into 2GB chunks:
# python reshard-causallm-model.py --src-model gpt-j-6B --out-path gpt-j-6B-sharded --torch_dtype float16 --max-shard-size 2GB
@twobob
twobob / cmp_and_test.py
Created July 22, 2023 23:12
hecks the char set unions and if valid jsonl
import json
def is_valid_jsonl(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
for line_number, line in enumerate(f, start=1):
try:
json.loads(line)
except json.JSONDecodeError:
print(f'Invalid JSON on line {line_number}: {line}')
return False
@twobob
twobob / delete_dirs.py
Created July 22, 2023 23:09
'Delete all subdirectories in the specified directory, except for the largest n subdirectories
import os
import heapq
import shutil
import argparse
def get_directory_size(directory):
"""
Calculate the size of the directory.
Parameters:
@twobob
twobob / qlorsStack.py
Created July 21, 2023 20:23
QLORA from Stackexchange zim exports
import argparse
import os
import sys
import random
import codecs
import json
from bs4 import BeautifulSoup
from multiprocessing import Pool
import colorama
from colorama import Fore
@twobob
twobob / Wget command
Created January 9, 2023 06:14
Wget command to download the default ckpt file for fine-tuning directly to google drive from colab
!wget https://model-server.zqevans2.workers.dev/jmann-small-190k.ckpt -O /content/drive/MyDrive/AI/models/jmann-small-190k.ckpt
@twobob
twobob / duall.sh
Created October 16, 2022 23:18
list disk usage without warnings as precis
du -cBM --max-depth=1 2> >(grep -v 'Permission denied') | grep -v 'cannot access' | sort -n
@twobob
twobob / gist:68d8a55b2bf4a577c8afb431fb145f60
Created October 12, 2022 22:40
Downloading files and ckpt from Kaggle, minimal hassle
! pip install kaggle
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json
# AN INDIVIDUAL .ckpt FILE
LINK='omgsupportteam/dd-tb-fine-22-10-11-22-42-36'
FILENAME = 'last.ckpt'
! kaggle datasets download $LINK -f $FILENAME
@twobob
twobob / left.py
Created October 8, 2022 01:45
print how long you have left in your daily colab session. Naive counter
import sys, json
from dateutil.parser import parse
from datetime import datetime, timezone, timedelta
with open("/var/colab/app.log", 'r') as fileData:
for textline in fileData:
if " started" in textline:
time = parse (json.loads(textline)['time'])
now = datetime.now(time.tzinfo)
later = time + timedelta(hours=4)
print(now-time, "session started")
@twobob
twobob / gist:f36342b286006c3830409387d88cc482
Created October 5, 2022 17:58
pruning code. Dance diffusion All props to Waifu
import torch
def prune_it(p):
print(f"prunin' in path: {p}")
size_initial = os.path.getsize(p)
nsd = dict()
sd = torch.load(p, map_location="cpu")
print(sd.keys())
for k in sd.keys():
if k != "optimizer_states":
nsd[k] = sd[k]