Skip to content

Instantly share code, notes, and snippets.

@briansp2020
briansp2020 / build_tf.sh
Created February 20, 2025 20:33
Script to build ROCm TF
#/bin/sh
cd
export HSA_OVERRIDE_GFX_VERSION=11.0.0
export PYTORCH_ROCM_ARCH="gfx1100"
export HIP_VISIBLE_DEVICES=0
export ROCM_PATH=/opt/rocm-6.3.2
echo "export HSA_OVERRIDE_GFX_VERSION=11.0.0" | tee --append .bashrc
echo "export PYTORCH_ROCM_ARCH=\"gfx1100\"" | tee --append .bashrc
echo "export HIP_VISIBLE_DEVICES=0" | tee --append .bashrc
echo "export ROCM_PATH=/opt/rocm-6.3.2" | tee --append .bashrc
@briansp2020
briansp2020 / rocm6.3.2-ub22.04-base
Created February 20, 2025 20:31
Dockerfile-rocm6.3.2
FROM ubuntu:22.04
RUN apt update && apt install -y curl rsync \
&& curl -O https://repo.radeon.com/amdgpu-install/6.3.2/ubuntu/jammy/amdgpu-install_6.3.60302-1_all.deb \
&& dpkg -i amdgpu-install_6.3.60302-1_all.deb \
&& DEBIAN_FRONTEND=noninteractive amdgpu-install --usecase=rocm --no-dkms --no-32 -y
RUN apt install -y git git-lfs pkg-config patchelf nano wget gpg cmake build-essential libgtest-dev \
python3-venv python3-dev python3-wheel python3-setuptools python3-requests python3-packaging python-is-python3 \
&& apt clean
(pt) root@rocm:~# python tmp/quickstart.py
/root/pt/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
/root/pt/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ConvNeXt_Small_Weights.IMAGENET1K_V1`. You can also use `weights=ConvNeXt_Small_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
epoch train_loss valid_loss error_rate time
0 0.110175 0.006588 0.001353 00:34
epoch train_loss valid_loss error_rate time
0 0.015370 0.002482 0.000677 00:47
Training text processing model
@briansp2020
briansp2020 / dmesg
Created February 26, 2024 16:38
dmesg showing MI100 initialization failure
[ 0.000000] Linux version 5.15.0-97-generic (buildd@lcy02-amd64-033) (gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, GNU ld (GNU Binutils for Ubuntu) 2.38) #107-Ubuntu SMP Wed Feb 7 13:26:48 UTC 2024 (Ubuntu 5.15.0-97.107-generic 5.15.136)
[ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-5.15.0-97-generic root=/dev/mapper/ubuntu--vg-ubuntu--lv ro
[ 0.000000] KERNEL supported cpus:
[ 0.000000] Intel GenuineIntel
[ 0.000000] AMD AuthenticAMD
[ 0.000000] Hygon HygonGenuine
[ 0.000000] Centaur CentaurHauls
[ 0.000000] zhaoxin Shanghai
[ 0.000000] BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009ffff] usable
@briansp2020
briansp2020 / build_tf.sh
Created November 19, 2023 04:58
Build TensorFlow r2.14
#/bin/sh
cd
export HSA_OVERRIDE_GFX_VERSION=11.0.0
export PYTORCH_ROCM_ARCH="gfx1100"
export HIP_VISIBLE_DEVICES=0
export ROCM_PATH=/opt/rocm
echo "export HSA_OVERRIDE_GFX_VERSION=11.0.0" | tee --append .bashrc
echo "export PYTORCH_ROCM_ARCH=\"gfx1100\"" | tee --append .bashrc
echo "export HIP_VISIBLE_DEVICES=0" | tee --append .bashrc
echo "export ROCM_PATH=/opt/rocm" | tee --append .bashrc
@briansp2020
briansp2020 / Running Triton
Created November 16, 2023 16:25
Running Triton with 7900XTX
bsp2020@rocm:~$ sudo reboot
Connection to 10.0.0.200 closed by remote host.
Connection to 10.0.0.200 closed.
(base) bsp2020@Ryzen5950X:~$ ssh 10.0.0.200
Welcome to Ubuntu 22.04.3 LTS (GNU/Linux 5.15.0-88-generic x86_64)
* Documentation: https://help.ubuntu.com
* Management: https://landscape.canonical.com
* Support: https://ubuntu.com/advantage
@briansp2020
briansp2020 / rocm5.7.1-ub22.04-base
Created November 15, 2023 17:20
ROCm 5.7.1 Docker file
FROM ubuntu:22.04
RUN apt update && apt install -y curl \
&& curl -O https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/jammy/amdgpu-install_5.7.50701-1_all.deb \
&& dpkg -i amdgpu-install_5.7.50701-1_all.deb \
&& DEBIAN_FRONTEND=noninteractive amdgpu-install --usecase=rocm --no-dkms --no-32 -y \
&& apt install -y git git-lfs pkg-config patchelf nano wget gpg cmake build-essential \
python3-venv python3-dev python3-wheel python3-setuptools python3-requests python3-packaging \
&& apt clean
@briansp2020
briansp2020 / benchmark_MI100_10222023.txt
Created October 25, 2023 21:25
MI100 new-ai-benchmakr
root@rocm:/root/tmp# python benchmark.py
2023-10-22 19:31:47.927753: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-22 19:31:47.946907: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-22 19:31:48.774214: I tensorflow/compiler/xla/stream_executor/rocm/rocm_gpu_executor.cc:838] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-22 19:31:48.785167: I tensorflow/compiler/xla/stream_executor/rocm/rocm_gpu_execu
@briansp2020
briansp2020 / benchmark_7900XTX_10142023.txt
Created October 15, 2023 00:12
Latest ai-benchmark using ROCm 5.7.1 and tensorflow-upstream 10/14/2023 source.
(tf) root@rocm:~/tmp# python benchmark.py
2023-10-14 15:02:22.116047: E external/local_xla/xla/stream_executor/plugin_registry.cc:93] Invalid plugin kind specified: DNN
2023-10-14 15:02:22.348480: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-14 15:02:23.756833: I external/local_xla/xla/stream_executor/rocm/rocm_gpu_executor.cc:787] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-14 15:02:23.982269: I external/local_xla/xla/stream_executor/rocm/rocm_gpu_executor.cc:787] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-14 15:02:23.9823
@briansp2020
briansp2020 / quickstart.py
Created October 7, 2023 13:08
fastai example code
from fastai.vision.all import *
from fastai.text.all import *
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.PETS)/'images'
def is_cat(x): return x[0].isupper()
dls = ImageDataLoaders.from_name_func(
path, get_image_files(path), valid_pct=0.2, seed=42,