# noble for ubuntu 24.04 or jammy for 22.04
sudo add-apt-repository -y -s deb http://security.ubuntu.com/ubuntu noble main universe
sudo apt update
# download and setup
wget https://repo.radeon.com/amdgpu-install/6.2.3/ubuntu/noble/amdgpu-install_6.2.60203-1_all.deb
sudo apt install ./amdgpu-install_6.2.60203-1_all.deb
sudo amdgpu-install -y --usecase=graphics,rocm
sudo usermod -a -G render,video $LOGNAME
# confirm with rocminfo
sudo apt-get rocminfo
rocminfo
By default GPU has access to 512MB VRAM (depends on device/setup), to increase:
# open grub file in editor
sudo nano /etc/default/grub
# update grub flags by adding vm_size (size depends on total shared ram memory, half or quater for example)
GRUB_CMDLINE_LINUX_DEFAULT='... amdgpu.vm_size=16G ...'
# apply changes
sudo update-grub
reboot
AMD Ryzen 7 PRO 8700GE is gfx1103 which is not compatible, but treating it as gfx1100 works
export TORCH_USE_HIP_DSA=1
export AMD_SERIALIZE_KERNEL=3
export ROCM_PATH=/opt/rocm6.2 # or /opt/rocm (both works)
export GFX_ARCH=gfx1100
export ROCM_VERSION=6.2.3
export PYTORCH_ROCM_ARCH="gfx1100"
export HSA_OVERRIDE_GFX_VERSION=11.0.0
# create and activate virtual environment
python3 -m venv rocm_torch
source rocm_torch/bin/activate
# install rocm 6.2 nightly
python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.2
# use stable rocm 6.2 if available, alternatively you could install rocm 6.1 using https://download.pytorch.org/whl/rocm6.1 or nightly https://download.pytorch.org/whl/nightly/rocm6.1
Note: As of the article's date, these versions were confirmed to be working:
- torch: 2.6.0.dev20241013+rocm6.2 (link)
- torchaudio: 2.5.0.dev20241013+rocm6.2 (link)
- torchvision: 0.20.0.dev20241013+rocm6.2 (link)
You could simply pass direct links to python3 -m pip install link link link
# check cuda device visible (AMD iGPU)
python3 -c "import torch; print(torch.cuda.is_available())"
python3 -c "import torch; print(torch.cuda.get_device_properties(0))"
# simple operations
python3 -c "import torch; x = torch.rand(5, 3); print(x)"
For memory intensive workflows those flags prevent OOM and GPU Hang errors:
# export or use inline
TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 TORCH_BLAS_PREFER_HIPBLASLT=0 HIP_VISIBLE_DEVICES=0 HIP_MEMORY_POOL_LIMIT=16000000000 PYTORCH_HIP_ALLOC_CONF=garbage_collection_threshold:0.9,max_split_size_mb:512 python3 main.py
Adjust HIP memory based on VRAM shared with iGPU (80000000000 was enough for the tests)
Performance boost on CUDA (ROCm) comparing to CPU:
- Basic matrix multiplications with float32: ~1500x *
- Image classification: 244.80x
- Speech recognition: 5.33x
- Bert: 1.52x
- Tacotron2 TTS: 1.21x
* Same speed for bfloat16 and float16 on rocm, but cpu is 500x slower on FP16 vs FP32
watch -n 1 rocm-smi
amd-smi monitor --vram-usage
htop
If you use an env like is the cause, and you are getting 'False' with python3 -c "import torch; print(torch.cuda.is_available())", this may be useful for you https://rocm.docs.amd.com/projects/radeon/en/latest/docs/limitations.html#running-pytorch-in-virtual-environments