twobob’s gists

twobob / petscii2ascii.py

Created August 3, 2023 22:37

# PETSCII to UTF-8 conversion functions and tests

	# PETSCII to UTF-8 conversion functions and tests

	import unittest

	def prtchflush(c: int) -> str:
	# ISO8859-15 to UTF-8
	special_characters = {
	0xA4: 0x20AC, # €
	0xA6: 0x160, # Š
	0xA8: 0x161, # š

twobob / tokens_per_second_analysis.py

Created August 3, 2023 09:59

analyse time logs for token outputs on babyllama

	import numpy as np
	import matplotlib.pyplot as plt

	# Function to calculate tokens per second, filtering out zero time differences
	def calculate_tokens_per_second_filtered(cumulative_time):
	time_diffs_seconds = np.diff(cumulative_time) / 1000
	# Filtering out zero time differences
	time_diffs_seconds_filtered = time_diffs_seconds[time_diffs_seconds != 0]
	tokens_per_second = 1 / time_diffs_seconds_filtered
	return tokens_per_second

twobob / gist:2dc7db56027aeeca0fdd71bc05cc25ed

Created August 2, 2023 00:22

	make win64 && make winclang && make wingcc

	for /L %i in (1,1,3) do @(for %x in (runmingw.exe rungcc.exe run.exe) do (set OMP_NUM_THREADS=4 && %x ../out/model110M.bin 0 0 "Once upon a time" 0))

	for /f "tokens=4" %i in ('chcp') do @(chcp 65001 && @echo off && for /L %j in (1,1,3) do @(for %x in (runmingw.exe rungcc.exe run.exe) do @(set OMP_NUM_THREADS=4 && %x ../out/model110M.bin 0 0 "And away they went" 0)) && @echo on && chcp %i)

twobob / base64_urlsafe.c

Created July 31, 2023 13:31

base64_urlsafe kinda roughly. for the recoverable filenames.

	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <stdint.h>
	#include <direct.h>
	#include <time.h>

	static inline void process_triplet(const char input, size_t i, uint32_t triplet) {
	*triplet = (input[i] << 16) \| (i + 1 < strlen(input) ? input[i + 1] << 8 : 0) \| (i + 2 < strlen(input) ? input[i + 2] : 0);
	}

twobob / run_blocks.c

Last active July 28, 2023 03:31

This version will spit out story blocks as timestamped .txt files, as fast as possible, to a folder named inbox WINDOWS VERSION YMMV

	/*
	Inference for Llama-2 Transformer model in pure C.
	This version will spit out story blocks as fast as possible to a folder called inbox
	Metrics are shown per story, no doubt this could be faster.

	Output using -03 and no -fopenmp, with token-by-token reporting on the test machine gave 6-8 tok/s second.
	Compiling as outlined below and foregoing constant screen output nets between 80-330 tok/s on the same machine.
	So between 10 - 55 times faster.

	Example compile: (see README for more details)

twobob / run.c

Last active July 25, 2023 14:16

gcc -o run run.c -lm -fopenmp -Wall -Wextra -Wpedantic -Wformat=2 -Wcast-align -Wconversion -Wsign-conversion -Wnull-dereference -g3 -Ofast

	/*
	Inference for Llama-2 Transformer model in pure C.

	Example compile: (see README for more details)
	$ gcc -O3 -o run run.c -lm

	Then run with:
	$ ./run
	*/

twobob / shard_it.py

Created July 24, 2023 14:11

	#!/usr/bin/env python

	# Based on: https://github.com/oobabooga/text-generation-webui/blob/main/convert-to-torch.py
	# License: GNU Affero General Public License v3.0
	#
	#
	# This script converts a transformers model using a custom shard size.
	#
	# Load a model from a directory and shard it into 2GB chunks:
	# python reshard-causallm-model.py --src-model gpt-j-6B --out-path gpt-j-6B-sharded --torch_dtype float16 --max-shard-size 2GB

twobob / cmp_and_test.py

Created July 22, 2023 23:12

hecks the char set unions and if valid jsonl

	import json

	def is_valid_jsonl(file_path):
	with open(file_path, 'r', encoding='utf-8') as f:
	for line_number, line in enumerate(f, start=1):
	try:
	json.loads(line)
	except json.JSONDecodeError:
	print(f'Invalid JSON on line {line_number}: {line}')
	return False

twobob / delete_dirs.py

Created July 22, 2023 23:09

'Delete all subdirectories in the specified directory, except for the largest n subdirectories

	import os
	import heapq
	import shutil
	import argparse

	def get_directory_size(directory):
	"""
	Calculate the size of the directory.

	Parameters:

twobob / qlorsStack.py

Created July 21, 2023 20:23

QLORA from Stackexchange zim exports

	import argparse
	import os
	import sys
	import random
	import codecs
	import json
	from bs4 import BeautifulSoup
	from multiprocessing import Pool
	import colorama
	from colorama import Fore

_ twobob