Skip to content

Instantly share code, notes, and snippets.

@cloneofsimo
Last active September 25, 2025 09:05
Show Gist options
  • Save cloneofsimo/5b2561edf8b6d4216ee9fd58b9aca2e6 to your computer and use it in GitHub Desktop.
Save cloneofsimo/5b2561edf8b6d4216ee9fd58b9aca2e6 to your computer and use it in GitHub Desktop.
Very common stuff that I forget all the time and I really shouldn't

DO this to some servers to make ssh connection over IDEs

sudo bash -c 'cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup && sed -i "/AllowTcpForwarding/c\AllowTcpForwarding yes" /etc/ssh/sshd_config && sed -i "/GatewayPorts/c\GatewayPorts yes" /etc/ssh/sshd_config && sed -i "/PermitTunnel/c\PermitTunnel yes" /etc/ssh/sshd_config' && sudo systemctl restart sshd

if that dont work,

sudo bash -c '
  cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup &&
  sed -i "/^#\?AllowTcpForwarding/c\AllowTcpForwarding yes" /etc/ssh/sshd_config &&
  sed -i "/^#\?GatewayPorts/c\GatewayPorts yes" /etc/ssh/sshd_config &&
  sed -i "/^#\?PermitTunnel/c\PermitTunnel yes" /etc/ssh/sshd_config
'

# … then restart (or reload) the correct service
sudo systemctl restart ssh 

clean clean

black .
autoflake --remove-all-unused-imports -i ./*
isort .

Kill cuda shit

lsof /dev/nvidia* | awk '{print $2}' | xargs -I {} kill {}

Check basic stuff

du -h | awk '$1 ~ /G/ && $1 > 1' | sort -h : over 1gb of current disk

Local git setup

git config --global init.defaultBranch main
git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

common initial machine setups

Install python3.9, venv, and make venv

#!/bin/bash
cd

git config --global init.defaultBranch main
git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

# Prevent interactive prompts
export DEBIAN_FRONTEND=noninteractive

# Update package lists and add repository without prompts
sudo apt update
sudo apt install -y software-properties-common
sudo add-apt-repository -y ppa:deadsnakes/ppa
sudo apt install -y pdsh
sudo apt install gh -y


# Install all required packages without prompts
sudo apt install -y \
    pdsh \
    tmux \
    htop \
    git-lfs \
    ffmpeg \
    python3.11 \
    python3.11-venv \
    python3.11-distutils \
    libpython3.11-dev \
    python3.11-dev

# Setup git-lfs
sudo git lfs install --skip-repo

# Create Python virtual environment
python3.11 -m ensurepip --upgrade
python3.11 -m venv ~/py311cuda

# Add environment activation to bashrc if not already present
if ! grep -q "source ~/py311cuda/bin/activate" ~/.bashrc; then
    echo "source ~/py311cuda/bin/activate" >> ~/.bashrc
fi

# Activate the environment for the current session
source ~/py311cuda/bin/activate

# Upgrade pip and install packages
pip install --no-cache-dir --upgrade pip
pip install --no-cache-dir \
    torch \
    tqdm \
    click \
    transformers \
    datasets \
    wandb \
    plotly \
    pandas \
    torchvision \
    openai \
    backoff \
    accelerate \
    lovely-tensors \
    git+https://github.com/fal-ai-community/falgentoolbox.git \
    av \
    diffusers \
    boto3 \
    streamlit
    
# Reload bash configuration
source ~/.bashrc

echo "Installation complete! Python environment is activated and will auto-activate in new shell sessions."
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}\nCUDA device count: {torch.cuda.device_count()}\nCurrent device: {torch.cuda.current_device()}')"

wandb login

Install basic stuff, git config, and cmd

git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

ACTIVATE_CMD="source ~/py39cuda/bin/activate"

# Check if the command is already in .bashrc
if grep -Fxq "$ACTIVATE_CMD" ~/.bashrc
then
    echo "Virtual environment activation command already in .bashrc"
else
    # Add the command to .bashrc
    echo "$ACTIVATE_CMD" >> ~/.bashrc
    echo "Virtual environment activation command added to .bashrc"
fi

$ACTIVATE_CMD
@cloneofsimo
Copy link
Author

addkey

#!/bin/bash

# Create the addkey script
cat << 'EOF' > ~/addkey
#!/bin/bash

# Check if an argument is provided
if [ -z "$1" ]; then
  echo "Usage: addkey \"ssh-rsa ...\""
  exit 1
fi

# Ensure the .ssh directory exists
mkdir -p ~/.ssh

# Add the provided key to the authorized_keys file
echo "$1" >> ~/.ssh/authorized_keys

# Set the correct permissions
chmod 700 ~/.ssh
chmod 600 ~/.ssh/authorized_keys

echo "Key added successfully."
EOF

# Make the addkey script executable
chmod +x ~/addkey

# Move the addkey script to /usr/local/bin to make it available system-wide
sudo mv ~/addkey /usr/local/bin/addkey

# Verify the installation
if command -v addkey &> /dev/null; then
  echo "The addkey command has been successfully installed."
else
  echo "There was an error installing the addkey command."
fi

@cloneofsimo
Copy link
Author

Claude filesplit

#!/usr/bin/env python3
"""
filesplitter.py - Split a text file into multiple files based on special comment lines.

Usage:
    python filesplitter.py <input_file> [output_dir]

The script looks for lines that start with:
- "# " followed by a path
- "// File: " followed by a path

When found, all content following that line until the next file marker will be written
to the specified path.
"""

import os
import sys
import re


def ensure_directory_exists(filepath):
    """Create the directory for a file if it doesn't exist."""
    directory = os.path.dirname(filepath)
    if directory and not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Created directory: {directory}")


def extract_filepath(line):
    """
    Extract the filepath from a marker line.
    Returns None if the line isn't a valid file marker.
    """
    # Match Python-style comments: "# path/to/file.py"
    python_match = re.match(r'^\s*#\s+(.+\.py)$', line)
    if python_match:
        return python_match.group(1).strip()
    
    # Match "//" style comments: "// File: path/to/file.ts"
    js_match = re.match(r'^\s*//\s+File:\s+(.+)$', line)
    if js_match:
        return js_match.group(1).strip()
    
    # Math "File: "
    file_match = re.match(r'^\s*#\s*File:\s+(.+)$', line)
    if file_match:
        midstr = file_match.group(1).strip()
        if midstr.startswith('```'):
            return "\n".join(midstr.split("\n")[1:-1])
        else:
            return midstr
    
    return None


def split_file(input_file, output_dir='.'):
    """
    Split the input file into multiple files based on marker comments.
    
    Args:
        input_file: Path to the input file
        output_dir: Base directory for output files (default: current directory)
    """
    current_file = None
    current_content = []
    file_count = 0
    
    print(f"Processing {input_file}...")
    
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            filepath = extract_filepath(line)
            
            if filepath:
                # If we have a current file, write its content before moving to the next
                if current_file and current_content:
                    full_path = os.path.join(output_dir, current_file)
                    ensure_directory_exists(full_path)
                    
                    with open(full_path, 'w', encoding='utf-8') as outfile:
                        outfile.writelines(current_content)
                    
                    print(f"Created file: {full_path}")
                    file_count += 1
                
                # Set the new current file and reset content
                current_file = filepath
                current_content = []
            elif current_file is not None:
                # Add the line to the current file's content
                current_content.append(line)
    
    # Don't forget to write the last file
    if current_file and current_content:
        full_path = os.path.join(output_dir, current_file)
        ensure_directory_exists(full_path)
        
        with open(full_path, 'w', encoding='utf-8') as outfile:
            outfile.writelines(current_content)
        
        print(f"Created file: {full_path}")
        file_count += 1
    
    print(f"Completed! Split into {file_count} files.")


def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <input_file> [output_dir]")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_dir = sys.argv[2] if len(sys.argv) > 2 else '.'
    
    if not os.path.exists(input_file):
        print(f"Error: Input file '{input_file}' does not exist.")
        sys.exit(1)
    
    split_file(input_file, output_dir)


if __name__ == "__main__":
    main()
    

@cloneofsimo
Copy link
Author

cloneofsimo commented Aug 7, 2025

fsdp

from torch.distributed import DeviceMesh
from torch.distributed._composable.fsdp import (
    fully_shard,
    CPUOffloadPolicy,
    MixedPrecisionPolicy,
)
import torch.distributed.checkpoint as dcp
from torch.distributed.checkpoint.state_dict import (
    StateDictOptions,
    get_model_state_dict,
)
from functools import lru_cache, partial, reduce
import torch.distributed as dist
from torch.distributed.device_mesh import DeviceMesh, init_device_mesh

import torch.distributed as dist
import datetime
import logging
import torch
import torch.nn as nn
from typing import Optional, Tuple, Union

def get_device_mesh():
    tp_size = 1
    dp_replicate = 1
    dp_shard = dist.get_world_size()

    assert (
        dp_replicate * dp_shard * tp_size == dist.get_world_size()
    ), f"dp_replicate * dp_shard * tp_size ({dp_replicate} * {dp_shard} * {tp_size}) != world_size ({dist.get_world_size()})"

    dims = []
    names = []
    if dp_replicate >= 1:
        dims.append(dp_replicate)
        names.append("dp_replicate")
    if dp_shard > 1:
        dims.append(dp_shard)
        names.append("dp_shard")
    if tp_size > 1:
        dims.append(tp_size)
        names.append("tp")
    dims = tuple(dims)
    names = tuple(names)

    return init_device_mesh("cuda", mesh_shape=dims, mesh_dim_names=names)


def get_module(module, access_string):
    names = access_string.split(sep=".")
    return reduce(getattr, names, module)


def set_module(module, access_string, value):
    names = access_string.split(sep=".")
    parent = reduce(getattr, names[:-1], module)
    setattr(parent, names[-1], value)


def apply_fsdp(model, param_dtype, reduce_dtype):

    device_mesh = get_device_mesh()
    fsdp_config = {
        "mp_policy": MixedPrecisionPolicy(
            param_dtype=torch.bfloat16, cast_forward_inputs=True,
            reduce_dtype=torch.float32,
        ),
        "mesh": device_mesh["dp_shard"],
    }
    
    # Check if model has blocks (transformer models)
    if hasattr(model, 'blocks'):
        # Apply block-level FSDP for transformer models
        for block_idx, block in enumerate(model.blocks):
            set_module(
                model,
                f"blocks.{block_idx}",
                fully_shard(
                    block, **fsdp_config, reshard_after_forward=True
                ),
            )
    
    # Apply model-level FSDP for all models
    model = fully_shard(model, **fsdp_config, reshard_after_forward=True)
    return model
    
def save_checkpoint(model, global_step, output_dir):
    checkpoint_dir = os.path.join(output_dir, f"checkpoint-{global_step}")

    dcp.save(
        get_model_state_dict(model),
        checkpoint_id=checkpoint_dir,
    )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment