Skip to content

Instantly share code, notes, and snippets.

View antferdom's full-sized avatar

A.J antferdom

View GitHub Profile
import torch
from transformers import AutoTokenizer, RobertaForMaskedLM, AutoConfig
from transformers.pipelines.base import infer_framework_load_model
from os import path
from huggingface_hub import hf_hub_download
def compare_models(pt_mdl, sf_mdl):
# A blend of convert.py's generalized check_final_model with concrete usage example to demonstrate
sf_dict = sf_mdl.state_dict()
@cloneofsimo
cloneofsimo / flash.py
Created June 22, 2023 07:51
FlashAttention comparison
import pytest
import torch
import triton
import triton.language as tl
@triton.jit
def _fwd_kernel(
Q, K, V, sm_scale,
@philipturner
philipturner / CalculateDiffusion.swift
Last active July 20, 2025 10:28
Calculate the number of floating-point operations in Stable Diffusion, and how those operations are distributed among layers
//
// main.swift
// CalculateDiffusion
//
// Created by Philip Turner on 6/2/23.
//
import Foundation
import QuartzCore
import MetalPerformanceShadersGraph
@anupambhatnagar
anupambhatnagar / docker_cheatsheet.sh
Last active January 16, 2024 20:40
Commonly used Docker commands
# Launch container and enter shell
docker run --rm -it <image_name>
# Create image from docker file
docker build -t <image_name:tag> -f /path/to/dockerfile .
# Create image from running container
docker commit <container_id> <account_name/image_name:tag>
# Push image to docker hub
@geohot
geohot / test_allreduce.py
Created March 14, 2023 07:19
Test Bandwidth of all reduce
import os
import sys
import time
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
def all_reduce_latency(nbytes, rank):
buf = torch.randn(nbytes // 4).cuda(rank)
@henryliu5
henryliu5 / gds.py
Created March 12, 2023 00:28
GPUDirect Storage Benchmarking Script
import subprocess
import argparse
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('seaborn')
load_type = {'SEQ_READ': 0, 'SEQ_WRITE':1, 'RAND_READ': 2, 'RAND_WRITE': 3}
# transfer_type = {'Storage->GPU (GDS)': 0, 'Storage->CPU': 1, 'Storage->CPU->GPU': 2, 'Storage->CPU-GPU_ASYNC', 3, 'Storage->PAGE_CACHE->CPU->GPU': 4, 'Storage->GPU_ASYNC': 5, 'STORAGE->GPU_BATCH': 6}
@liviaerxin
liviaerxin / README.md
Last active December 12, 2025 19:37
FastAPI and Uvicorn Logging #python #fastapi #uvicorn #logging

FastAPI and Uvicorn Logging

When running FastAPI app, all the logs in console are from Uvicorn and they do not have timestamp and other useful information. As Uvicorn applies python logging module, we can override Uvicorn logging formatter by applying a new logging configuration.

Meanwhile, it's able to unify the your endpoints logging with the Uvicorn logging by configuring all of them in the config file log_conf.yaml.

Before overriding:

uvicorn main:app --reload
import sys
import os
import torch
from safetensors.torch import load_file
import datetime
from omegaconf import OmegaConf
sys.path.append(os.path.abspath(os.path.join(os.path.dirname( __file__ ), "repositories/stable-diffusion-stability-ai")))
from ldm.modules.diffusionmodules.model import Model
from ldm.util import instantiate_from_config
@Narsil
Narsil / pure_torch.py
Created November 10, 2022 15:06
Loading a safetensors file with pure torch only
import mmap
import torch
import json
import os
from huggingface_hub import hf_hub_download
def load_file(filename, device):
with open(filename, mode="r", encoding="utf8") as file_obj:
with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m:
@serjtroshin
serjtroshin / bigscience_tokenizer.ipynb
Last active January 25, 2025 15:07
Preliminary analysis of multilang tokenizer
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.