DO this to some servers to make ssh connection over IDEs

sudo bash -c 'cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup && sed -i "/AllowTcpForwarding/c\AllowTcpForwarding yes" /etc/ssh/sshd_config && sed -i "/GatewayPorts/c\GatewayPorts yes" /etc/ssh/sshd_config && sed -i "/PermitTunnel/c\PermitTunnel yes" /etc/ssh/sshd_config' && sudo systemctl restart sshd

if that dont work,

sudo bash -c '
  cp /etc/ssh/sshd_config /etc/ssh/sshd_config.backup &&
  sed -i "/^#\?AllowTcpForwarding/c\AllowTcpForwarding yes" /etc/ssh/sshd_config &&
  sed -i "/^#\?GatewayPorts/c\GatewayPorts yes" /etc/ssh/sshd_config &&
  sed -i "/^#\?PermitTunnel/c\PermitTunnel yes" /etc/ssh/sshd_config
'

# … then restart (or reload) the correct service
sudo systemctl restart ssh

clean clean

black .
autoflake --remove-all-unused-imports -i ./*
isort .

Kill cuda shit

lsof /dev/nvidia* | awk '{print $2}' | xargs -I {} kill {}

Check basic stuff

du -h | awk '$1 ~ /G/ && $1 > 1' | sort -h : over 1gb of current disk

Local git setup

git config --global init.defaultBranch main
git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

common initial machine setups

Install python3.9, venv, and make venv

#!/bin/bash
cd

git config --global init.defaultBranch main
git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

# Prevent interactive prompts
export DEBIAN_FRONTEND=noninteractive

# Update package lists and add repository without prompts
sudo apt update
sudo apt install -y software-properties-common
sudo add-apt-repository -y ppa:deadsnakes/ppa
sudo apt install -y pdsh
sudo apt install gh -y


# Install all required packages without prompts
sudo apt install -y \
    pdsh \
    tmux \
    htop \
    git-lfs \
    ffmpeg \
    python3.11 \
    python3.11-venv \
    python3.11-distutils \
    libpython3.11-dev \
    python3.11-dev

# Setup git-lfs
sudo git lfs install --skip-repo

# Create Python virtual environment
python3.11 -m ensurepip --upgrade
python3.11 -m venv ~/py311cuda

# Add environment activation to bashrc if not already present
if ! grep -q "source ~/py311cuda/bin/activate" ~/.bashrc; then
    echo "source ~/py311cuda/bin/activate" >> ~/.bashrc
fi

# Activate the environment for the current session
source ~/py311cuda/bin/activate

# Upgrade pip and install packages
pip install --no-cache-dir --upgrade pip
pip install --no-cache-dir \
    torch \
    tqdm \
    click \
    transformers \
    datasets \
    wandb \
    plotly \
    pandas \
    torchvision \
    openai \
    backoff \
    accelerate \
    lovely-tensors \
    git+https://github.com/fal-ai-community/falgentoolbox.git \
    av \
    diffusers \
    boto3 \
    streamlit
    
# Reload bash configuration
source ~/.bashrc

echo "Installation complete! Python environment is activated and will auto-activate in new shell sessions."
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}\nCUDA device count: {torch.cuda.device_count()}\nCurrent device: {torch.cuda.current_device()}')"

wandb login

Install basic stuff, git config, and cmd

git config --global user.name "Simo Ryu"
git config --global user.email "[email protected]"

ACTIVATE_CMD="source ~/py39cuda/bin/activate"

# Check if the command is already in .bashrc
if grep -Fxq "$ACTIVATE_CMD" ~/.bashrc
then
    echo "Virtual environment activation command already in .bashrc"
else
    # Add the command to .bashrc
    echo "$ACTIVATE_CMD" >> ~/.bashrc
    echo "Virtual environment activation command added to .bashrc"
fi

$ACTIVATE_CMD

from torch.distributed import DeviceMesh from torch.distributed._composable.fsdp import ( fully_shard, CPUOffloadPolicy, MixedPrecisionPolicy, ) import torch.distributed.checkpoint as dcp from torch.distributed.checkpoint.state_dict import ( StateDictOptions, get_model_state_dict, ) from functools import lru_cache, partial, reduce import torch.distributed as dist from torch.distributed.device_mesh import DeviceMesh, init_device_mesh import torch.distributed as dist import datetime import logging import torch import torch.nn as nn from typing import Optional, Tuple, Union def get_device_mesh(): tp_size = 1 dp_replicate = 1 dp_shard = dist.get_world_size() assert ( dp_replicate * dp_shard * tp_size == dist.get_world_size() ), f"dp_replicate * dp_shard * tp_size ({dp_replicate} * {dp_shard} * {tp_size}) != world_size ({dist.get_world_size()})" dims = [] names = [] if dp_replicate >= 1: dims.append(dp_replicate) names.append("dp_replicate") if dp_shard > 1: dims.append(dp_shard) names.append("dp_shard") if tp_size > 1: dims.append(tp_size) names.append("tp") dims = tuple(dims) names = tuple(names) return init_device_mesh("cuda", mesh_shape=dims, mesh_dim_names=names) def get_module(module, access_string): names = access_string.split(sep=".") return reduce(getattr, names, module) def set_module(module, access_string, value): names = access_string.split(sep=".") parent = reduce(getattr, names[:-1], module) setattr(parent, names[-1], value) def apply_fsdp(model, param_dtype, reduce_dtype): device_mesh = get_device_mesh() fsdp_config = { "mp_policy": MixedPrecisionPolicy( param_dtype=torch.bfloat16, cast_forward_inputs=True, reduce_dtype=torch.float32, ), "mesh": device_mesh["dp_shard"], } # Check if model has blocks (transformer models) if hasattr(model, 'blocks'): # Apply block-level FSDP for transformer models for block_idx, block in enumerate(model.blocks): set_module( model, f"blocks.{block_idx}", fully_shard( block, **fsdp_config, reshard_after_forward=True ), ) # Apply model-level FSDP for all models model = fully_shard(model, **fsdp_config, reshard_after_forward=True) return model def save_checkpoint(model, global_step, output_dir): checkpoint_dir = os.path.join(output_dir, f"checkpoint-{global_step}") dcp.save( get_model_state_dict(model), checkpoint_id=checkpoint_dir, )

cloneofsimo/common.md

DO this to some servers to make ssh connection over IDEs

if that dont work,

clean clean

Kill cuda shit

Check basic stuff

Local git setup

common initial machine setups

Install python3.9, venv, and make venv

Install basic stuff, git config, and cmd

cloneofsimo commented Aug 7, 2025 •

edited

Loading

Uh oh!

cloneofsimo/common.md

DO this to some servers to make ssh connection over IDEs

if that dont work,

clean clean

Kill cuda shit

Check basic stuff

Local git setup

common initial machine setups

Install python3.9, venv, and make venv

Install basic stuff, git config, and cmd

cloneofsimo commented Aug 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

cloneofsimo commented Aug 7, 2025 •

edited

Loading