Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.

// 3D Dom viewer, copy-paste this into your console to visualise the DOM as a stack of solid blocks. | |
// You can also minify and save it as a bookmarklet (https://www.freecodecamp.org/news/what-are-bookmarklets/) | |
(() => { | |
const SHOW_SIDES = false; // color sides of DOM nodes? | |
const COLOR_SURFACE = true; // color tops of DOM nodes? | |
const COLOR_RANDOM = false; // randomise color? | |
const COLOR_HUE = 190; // hue in HSL (https://hslpicker.com) | |
const MAX_ROTATION = 180; // set to 360 to rotate all the way round | |
const THICKNESS = 20; // thickness of layers | |
const DISTANCE = 10000; // ¯\\_(ツ)_/¯ |
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory | |
git clone https://github.com/myshell-ai/OpenVoice | |
cd OpenVoice | |
git clone https://huggingface.co/myshell-ai/OpenVoice | |
cp -r OpenVoice/* . | |
pip install whisper pynput pyaudio | |
""" | |
from openai import OpenAI | |
import time |
Let's say we're trying to load a LLaMA model via AutoModelForCausalLM.from_pretrained
with 4-bit quantization in order to inference from it:
python -m generate.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, LlamaTokenizerFast, LlamaForCausalLM
import transformers
nvidia-smi
said this required 11181MiB, at least to train on the sequence lengths of prompt that occurred initially in the alpaca dataset (~337 token long prompts).
You can get this down to about 10.9GB if (by modifying qlora.py) you run torch.cuda.empty_cache()
after PEFT has been applied to your loaded model and before you begin training.
All instructions are written assuming your command-line shell is bash.
Clone repository:
#This module is meant for direct use only. For API-usage please check SDA-TRAINER. | |
#Based off NVIDIA's demo | |
import argparse | |
from threads.trt.models import CLIP, UNet, VAE | |
import os | |
import onnx | |
import torch | |
from diffusers import UNet2DConditionModel, AutoencoderKL | |
from transformers import CLIPTextModel | |
from threads.trt.utilities import Engine |
from huggingface_hub import hf_hub_download | |
from flax.serialization import msgpack_restore | |
from safetensors.flax import save_file | |
import numpy as np | |
filename = hf_hub_download("gpt2", filename="flax_model.msgpack") | |
with open(filename, "rb") as f: | |
data = f.read() | |
flax_weights = msgpack_restore(data) |
# %% | |
import replicate | |
model = replicate.models.get("prompthero/openjourney") | |
version = model.versions.get("9936c2001faa2194a261c01381f90e65261879985476014a0a37a334593a05eb") | |
PROMPT = "mdjrny-v4 style 360 degree equirectangular panorama photograph, Alps, giant mountains, meadows, rivers, rolling hills, trending on artstation, cinematic composition, beautiful lighting, hyper detailed, 8 k, photo, photography" | |
output = version.predict(prompt=PROMPT, width=1024, height=512) | |
# %% | |
# download the iamge from the url at output[0] | |
import requests |
# Got a bunch of .ckpt files to convert? | |
# Here's a handy script to take care of all that for you! | |
# Original .ckpt files are not touched! | |
# Make sure you have enough disk space! You are going to DOUBLE the size of your models folder! | |
# | |
# First, run: | |
# pip install torch torchsde==0.2.5 safetensors==0.2.5 | |
# | |
# Place this file in the **SAME DIRECTORY** as all of your .ckpt files, open a command prompt for that folder, and run: | |
# python convert_to_safe.py |
import mmap | |
import torch | |
import json | |
import os | |
from huggingface_hub import hf_hub_download | |
def load_file(filename, device): | |
with open(filename, mode="r", encoding="utf8") as file_obj: | |
with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m: |