Skip to content

Instantly share code, notes, and snippets.

@cloneofsimo
Last active September 25, 2025 09:05
Show Gist options
  • Save cloneofsimo/5b2561edf8b6d4216ee9fd58b9aca2e6 to your computer and use it in GitHub Desktop.
Save cloneofsimo/5b2561edf8b6d4216ee9fd58b9aca2e6 to your computer and use it in GitHub Desktop.
Very common stuff that I forget all the time and I really shouldn't

DO this to some servers to make ssh connection over IDEs

sudo bash -c 'cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup && sed -i "/AllowTcpForwarding/c\AllowTcpForwarding yes" /etc/ssh/sshd_config && sed -i "/GatewayPorts/c\GatewayPorts yes" /etc/ssh/sshd_config && sed -i "/PermitTunnel/c\PermitTunnel yes" /etc/ssh/sshd_config' && sudo systemctl restart sshd

if that dont work,

sudo bash -c '
  cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup &&
  sed -i "/^#\?AllowTcpForwarding/c\AllowTcpForwarding yes" /etc/ssh/sshd_config &&
  sed -i "/^#\?GatewayPorts/c\GatewayPorts yes" /etc/ssh/sshd_config &&
  sed -i "/^#\?PermitTunnel/c\PermitTunnel yes" /etc/ssh/sshd_config
'

# … then restart (or reload) the correct service
sudo systemctl restart ssh 

clean clean

black .
autoflake --remove-all-unused-imports -i ./*
isort .

Kill cuda shit

lsof /dev/nvidia* | awk '{print $2}' | xargs -I {} kill {}

Check basic stuff

du -h | awk '$1 ~ /G/ && $1 > 1' | sort -h : over 1gb of current disk

Local git setup

git config --global init.defaultBranch main
git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

common initial machine setups

Install python3.9, venv, and make venv

#!/bin/bash
cd

git config --global init.defaultBranch main
git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

# Prevent interactive prompts
export DEBIAN_FRONTEND=noninteractive

# Update package lists and add repository without prompts
sudo apt update
sudo apt install -y software-properties-common
sudo add-apt-repository -y ppa:deadsnakes/ppa
sudo apt install -y pdsh
sudo apt install gh -y


# Install all required packages without prompts
sudo apt install -y \
    pdsh \
    tmux \
    htop \
    git-lfs \
    ffmpeg \
    python3.11 \
    python3.11-venv \
    python3.11-distutils \
    libpython3.11-dev \
    python3.11-dev

# Setup git-lfs
sudo git lfs install --skip-repo

# Create Python virtual environment
python3.11 -m ensurepip --upgrade
python3.11 -m venv ~/py311cuda

# Add environment activation to bashrc if not already present
if ! grep -q "source ~/py311cuda/bin/activate" ~/.bashrc; then
    echo "source ~/py311cuda/bin/activate" >> ~/.bashrc
fi

# Activate the environment for the current session
source ~/py311cuda/bin/activate

# Upgrade pip and install packages
pip install --no-cache-dir --upgrade pip
pip install --no-cache-dir \
    torch \
    tqdm \
    click \
    transformers \
    datasets \
    wandb \
    plotly \
    pandas \
    torchvision \
    openai \
    backoff \
    accelerate \
    lovely-tensors \
    git+https://github.com/fal-ai-community/falgentoolbox.git \
    av \
    diffusers \
    boto3 \
    streamlit
    
# Reload bash configuration
source ~/.bashrc

echo "Installation complete! Python environment is activated and will auto-activate in new shell sessions."
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}\nCUDA device count: {torch.cuda.device_count()}\nCurrent device: {torch.cuda.current_device()}')"

wandb login

Install basic stuff, git config, and cmd

git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

ACTIVATE_CMD="source ~/py39cuda/bin/activate"

# Check if the command is already in .bashrc
if grep -Fxq "$ACTIVATE_CMD" ~/.bashrc
then
    echo "Virtual environment activation command already in .bashrc"
else
    # Add the command to .bashrc
    echo "$ACTIVATE_CMD" >> ~/.bashrc
    echo "Virtual environment activation command added to .bashrc"
fi

$ACTIVATE_CMD
@cloneofsimo
Copy link
Author

cloneofsimo commented Aug 7, 2025

fsdp

from torch.distributed import DeviceMesh
from torch.distributed._composable.fsdp import (
    fully_shard,
    CPUOffloadPolicy,
    MixedPrecisionPolicy,
)
import torch.distributed.checkpoint as dcp
from torch.distributed.checkpoint.state_dict import (
    StateDictOptions,
    get_model_state_dict,
)
from functools import lru_cache, partial, reduce
import torch.distributed as dist
from torch.distributed.device_mesh import DeviceMesh, init_device_mesh

import torch.distributed as dist
import datetime
import logging
import torch
import torch.nn as nn
from typing import Optional, Tuple, Union

def get_device_mesh():
    tp_size = 1
    dp_replicate = 1
    dp_shard = dist.get_world_size()

    assert (
        dp_replicate * dp_shard * tp_size == dist.get_world_size()
    ), f"dp_replicate * dp_shard * tp_size ({dp_replicate} * {dp_shard} * {tp_size}) != world_size ({dist.get_world_size()})"

    dims = []
    names = []
    if dp_replicate >= 1:
        dims.append(dp_replicate)
        names.append("dp_replicate")
    if dp_shard > 1:
        dims.append(dp_shard)
        names.append("dp_shard")
    if tp_size > 1:
        dims.append(tp_size)
        names.append("tp")
    dims = tuple(dims)
    names = tuple(names)

    return init_device_mesh("cuda", mesh_shape=dims, mesh_dim_names=names)


def get_module(module, access_string):
    names = access_string.split(sep=".")
    return reduce(getattr, names, module)


def set_module(module, access_string, value):
    names = access_string.split(sep=".")
    parent = reduce(getattr, names[:-1], module)
    setattr(parent, names[-1], value)


def apply_fsdp(model, param_dtype, reduce_dtype):

    device_mesh = get_device_mesh()
    fsdp_config = {
        "mp_policy": MixedPrecisionPolicy(
            param_dtype=torch.bfloat16, cast_forward_inputs=True,
            reduce_dtype=torch.float32,
        ),
        "mesh": device_mesh["dp_shard"],
    }
    
    # Check if model has blocks (transformer models)
    if hasattr(model, 'blocks'):
        # Apply block-level FSDP for transformer models
        for block_idx, block in enumerate(model.blocks):
            set_module(
                model,
                f"blocks.{block_idx}",
                fully_shard(
                    block, **fsdp_config, reshard_after_forward=True
                ),
            )
    
    # Apply model-level FSDP for all models
    model = fully_shard(model, **fsdp_config, reshard_after_forward=True)
    return model
    
def save_checkpoint(model, global_step, output_dir):
    checkpoint_dir = os.path.join(output_dir, f"checkpoint-{global_step}")

    dcp.save(
        get_model_state_dict(model),
        checkpoint_id=checkpoint_dir,
    )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment