We’ll be running all the following steps inside a ubuntu:latest
docker container. This will help guarantee reproducibility. I’ll be using podman
locally, but you can use docker
with exactly the same steps. Do not forget to docker pull ubuntu:latest
to have the latest version of that container locally.
Get the sources for PyTorch:
git clone --recursive [email protected]:pytorch/pytorch.git
As some riscv64 binaries will be run in the build and test process, you need to install QEMU on your host machine:
apt update && apt install -y --no-install-recommends qemu-user-static
Launch the container, mounting in the freshly cloned repository of PyTorch:
podman run --rm -it --privileged --pids-limit=-1 --network=host -v ~/.gdbinit:/root/.gdbinit -v ~/.ssh:/root/.ssh -v ~/.bash_history:/root/.bash_history -v ~/.gitconfig:/root/.gitconfig -v $(pwd):/scratch/pytorch -w /scratch/pytorch --name pytorch-container ubuntu:latest bash
All following commands will be run inside the container, unless specified otherwise.
Install some necessary packages:
apt update && apt install -y --no-install-recommends build-essential gcc-14 gcc-14-riscv64-linux-gnu g++-14 g++-14-riscv64-linux-gnu cmake debootstrap ca-certificates lsb-release curl gnupg software-properties-common ninja-build python3-venv git ccache
Install the latest version of LLVM:
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
./llvm.sh 19
rm llvm.sh
Create a sysroot to contain the riscv64 dependencies necessary to build and run pytorch:
debootstrap --arch=riscv64 --components=main,universe --include=build-essential,gcc-14,g++-14,googletest,libbenchmark-dev,libuv1-dev,pybind11-dev,python-is-python3,python3,python3-dev,python3-venv,libopenblas-dev noble sysroot
Save path of sysroot for future use:
export SYSROOT=$(pwd)/sysroot
Before building PyTorch, we’ll need to build SLEEF as a recent-enough version isn’t yet available as part of the Ubuntu package registries:
cd third_party/sleef
# Build native
cmake -GNinja -B build-native \
-DBUILD_SHARED_LIBS=OFF \
-DBUILD_DFT=OFF \
-DBUILD_GNUABI_LIBS=OFF \
-DBUILD_TESTS=OFF \
-DCMAKE_C_COMPILER=$(which clang-19) \
-DCMAKE_TOOLCHAIN_FILE=$(pwd)/toolchains/native-llvm.cmake
ninja -C build-native install
# Build riscv64
cmake -GNinja -B build-riscv64 \
-DBUILD_SHARED_LIBS=OFF \
-DBUILD_DFT=OFF \
-DBUILD_GNUABI_LIBS=OFF \
-DBUILD_TESTS=OFF \
-DSLEEF_ENFORCE_RVVM1=ON \
-DSLEEF_ENFORCE_RVVM2=ON \
-DCMAKE_SYSROOT=${SYSROOT} \
-DCMAKE_INSTALL_PREFIX=${SYSROOT} \
-DCMAKE_C_COMPILER=$(which clang-19) \
-DCMAKE_TOOLCHAIN_FILE=$(pwd)/toolchains/riscv64-llvm.cmake \
-DNATIVE_BUILD_DIR=$(pwd)/build-native�ninja -C build-riscv64 install
cd -
Setup some environment variables which will be picked up by the Pytorch build system:
## Force PyTorch to use the just installed SLEEF
export USE_SYSTEM_SLEEF=ON
# Disable OpenMP, it fails to find the library
# export USE_OPENMP=OFF
# Disable CUDA
export USE_CUDA=0
export ATEN_THREADING=NATIVE
�# Force using OpenBLAS as the BLAS provider
export BLAS=OpenBLAS
# export USE_MKLDNN=ON # do not activate oneDNN just yet, that's future work
# Enable cross-compilation for riscv64
export CC=$(which clang-19)
export CFLAGS="--target=riscv64-linux-gnu --sysroot=${SYSROOT} -I${SYSROOT}/usr/include/python3.12"
export CMAKE_TOOLCHAIN_FILE=$(pwd)/toolchains/riscv64-llvm.cmake
cp ${SYSROOT}/usr/lib/python3.12/_sysconfigdata__linux_riscv64-linux-gnu.py /usr/lib/python3.12/
export _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata__linux_riscv64-linux-gnu
# Setup QEMU for the few RISC-V executables that will run during the build
export QEMU_LD_PREFIX="${SYSROOT}"
Add the toolchains/riscv64-llvm.cmake
file:
mkdir -p toolchains
cat > toolchains/riscv64-llvm.cmake << 'EOF'
set(CMAKE_CROSSCOMPILING True)
set(CMAKE_SYSTEM_NAME "Linux")
set(CMAKE_SYSTEM_PROCESSOR "riscv64")
set(CMAKE_SYSROOT $ENV{SYSROOT})
# When Clang is cross-compiling, Clang detects GCC toolchains in
# `${CMAKE_SYSROOT}` and prioritizes
# `${CMAKE_SYSROOT}/lib/gcc/riscv64-linux-gnu/14` over
# `${CMAKE_SYSROOT}/usr/lib/gcc/riscv64-linux-gnu/14`. The former is a symlink
# to the latter, as `${CMAKE_SYSROOT}/lib` resolves to `${CMAKE_SYSROOT}/usr/lib`.
#
# CMake parses the GCC toolchain used by Clang to find C++ headers and treat
# them as an implicit dependency for any C++ targets. The symlink of the former
# GCC toolchain is incorrectly resolved when finding these headers in CMake's
# `CMakeParseImplicitIncludeInfo.cmake` which results in Ninja thinking these
# headers and any C++ target depending upon them are dirty. This can cause a
# single CUDA file to need to rebuild 150+ targets when nothing has changed
# besides that one file. To workaround this CMake bug, the GCC toolchain is
# explicitly selected to be a path which does not contain a symlink.
set(GCC_INSTALL_DIR "${CMAKE_SYSROOT}/usr/lib/gcc/riscv64-linux-gnu/14")
set(CLANG_GCC_INSTALL_FLAG "--gcc-install-dir=${GCC_INSTALL_DIR}")
set(CMAKE_FIND_ROOT_PATH
${CMAKE_SYSROOT}/usr/lib
${CMAKE_SYSROOT}/usr/lib/riscv64-linux-gnu
${CMAKE_SYSROOT}/usr/include/riscv64-linux-gnu
${CMAKE_SYSROOT}/usr/local/lib/python3.12/dist-packages
)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
find_program(CMAKE_C_COMPILER NAMES clang-19)
set(CMAKE_C_COMPILER_TARGET riscv64-linux-gnu)
set(CMAKE_C_FLAGS "-w ${CLANG_GCC_INSTALL_FLAG}")
find_program(CMAKE_CXX_COMPILER NAMES clang++-19)
set(CMAKE_CXX_COMPILER_TARGET riscv64-linux-gnu)
set(CMAKE_CXX_FLAGS "-w ${CLANG_GCC_INSTALL_FLAG}")
# cmake can't find OpenBLAS otherwise
set(ENV{OpenBLAS_HOME} ${CMAKE_SYSROOT}/usr/include/riscv64-linux-gnu/openblas-pthread)
set(ENV{OpenBLAS} ${CMAKE_SYSROOT}/usr/lib/riscv64-linux-gnu/openblas-pthread)
# cmake can't find Python otherwise
set(Python_INCLUDE_DIR ${CMAKE_SYSROOT}/usr/include/python3.12)
# Unset temporary variables
unset(CLANG_GCC_INSTALL_FLAG)
unset(GCC_INSTALL_DIR)�EOF
Setup the build dependencies for PyTorch:
python3 -m venv .venv
source .venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
The above command will activate the local Python virtual env. You’ll need to make sure this virtual env is activated for all following steps.
Build the PyTorch wheel:
python3 setup.py bdist_wheel --plat-name=linux_riscv64 --dist-dir wheels/
In a separate terminal (without killing the first one), attach to the running container:
podman exec -it pytorch-container bash
Setup some of the environment variables in this second terminal:
# in the 2nd terminal
export SYSROOT=$(pwd)/sysroot
# Setup QEMU for the few RISC-V executables that will run during the build
export QEMU_LD_PREFIX="${SYSROOT}"
# Point pip to the RISE PyPI Registry, it contains RISC-V builds of various projects
export PIP_INDEX_URL=https://gitlab.com/api/v4/projects/riseproject%2Fpython%2Fwheel_builder/packages/pypi/simple
Setup another python virtual env, this one targeting riscv64:
# in the 2nd terminal
sysroot/usr/bin/python3 -m venv .venv-riscv64
source .venv-riscv64/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
Install the PyTorch wheel you just built:
# in the 2nd terminal
pip install wheels/torch-*-linux_riscv64.whl
And FINALLY, validate it all works with a simple matmul:
# in the 2nd terminal
cd /tmp # need to change directory or the "import torch" will prioritize the local directory /scratch/pytorch/torch over the installed wheel
export LD_LIBRARY_PATH=/scratch/pytorch/sysroot/usr/lib/riscv64-linux-gnu/openblas-pthread/
python -c "import torch; a = torch.tensor([[1.0, 2.0], [4.0, 5.0]]).float(); b = torch.tensor([[
7.0, 8.0], [9.0, 10.0]]).float(); print(torch.matmul(a, b))"
It should output:
tensor([[25., 28.],
[73., 82.]])