Skip to content

Instantly share code, notes, and snippets.

@cloneofsimo
cloneofsimo / lance.py
Created May 30, 2024 20:39
lance dataset concurrent writes?
import lance
import pyarrow as pa
import numpy as np
import time
import os
import multiprocessing as mp
def producer(N =1 ):
yield pa.RecordBatch.from_arrays([
@cloneofsimo
cloneofsimo / justreadwds.py
Created June 1, 2024 10:07
Read? I get about 1000 img / s
import os
import json
from PIL import Image
import logging
from torch.utils.data import DataLoader
import webdataset as wds
import argparse
from tqdm import tqdm
import numpy as np
from torchvision import transforms
@cloneofsimo
cloneofsimo / low-rank-transpose-inv-similarity.py
Created June 3, 2024 17:14
Is your backprop secretly linear solver?
# motivated by https://x.com/yaroslavvb/status/1797662470859071892
import matplotlib.pyplot as plt
import numpy as np
def cosine_similarity(v1, v2):
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
def stable_rank(matrix):
s = np.linalg.svd(matrix, compute_uv=False)
@cloneofsimo
cloneofsimo / merge_mds_fast.py
Created June 24, 2024 00:59
MDS-Multiprocessed-datamerging to NFS, because writing is async this is faster
import os
import json
from glob import glob
from tqdm import tqdm
from multiprocessing import Pool, Manager, cpu_count
def with_id(basename: str, shard_id: int) -> str:
parts = basename.split(".")
parts[1] = f"{shard_id:07}"
return ".".join(parts)
@cloneofsimo
cloneofsimo / sample.py
Created July 1, 2024 20:04
self-contained sampling code
## MM DiT model that was proposed by SD3 paper.
# I've tried to make this follow the work of MuP, so they scale in maximal feature-learning regime.
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import torch
import json
from PIL import Image
from torch.utils.data import Dataset
from diffusers.models import AutoencoderKL
from streaming import MDSWriter
import logging
import time
import os
import torch
import json
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from diffusers.models import AutoencoderKL
from streaming import MDSWriter
import logging
import time
@cloneofsimo
cloneofsimo / radical_image_descriptions.json
Last active September 8, 2024 16:45
Set of wildly complex image descriptions & atomic factual statements.
[
{
"description": "a flamboyant octopus dressed in a tuxedo is hosting an undersea tea party on a coral reef, complete with tiny teacups and saucers made from seashells. Each of the octopus's eight arms is elegantly balancing a different teapot, pouring colorful and sparkly liquid into the cups. Surrounding the octopus are a variety of sea creatures dressed in formal wear: a well-dressed clownfish is toasting with a miniature glass, a sea turtle wears a monocle and is engaged in deep conversation with a stylishly-attired seahorse, and a group of jellyfish float nearby, their translucent bodies glowing softly in the ambient light. Coral formations around them resemble extravagant table decorations, draped in strands of pearls and glittering seaweed, while a vibrant school of fish swims by, seemingly mesmerized by the spectacle. The scene is filled with a sense of whimsy and elegance, illuminated by streams of sunlight filtering down from above, creating a magical underwater atmosphere.",
"
@cloneofsimo
cloneofsimo / aurav02.py
Created July 24, 2024 14:06
AuraFlow v0.2, sampling that handles self-unconditioning CFG
#### Inference utils
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from diffusers.image_processor import VaeImageProcessor
from diffusers.models import AutoencoderKL
@cloneofsimo
cloneofsimo / vibecheckgen.py
Created July 27, 2024 13:23
vibecheckgen.py
import requests
import json
import base64
from PIL import Image
from io import BytesIO
import os
import openai
from openai import OpenAI
import fal_client