Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created October 31, 2025 09:50
Show Gist options
  • Save AmosLewis/bae7dd671e0f82f4a97eb1d510a758b2 to your computer and use it in GitHub Desktop.
Save AmosLewis/bae7dd671e0f82f4a97eb1d510a758b2 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
# Debug script to see the full FBGEMM-GPU build error for B200
CONTAINER="nvcr.io/nvidia/pytorch:25.10-py3"
echo "=========================================="
echo "Debugging FBGEMM-GPU Build for B200"
echo "=========================================="
echo ""
docker run --gpus '"device=0"' --rm \
--network host \
--ipc=host \
-v $(pwd):/workspace \
-w /workspace \
$CONTAINER \
bash -c "
set -e
echo '1. Installing build dependencies...'
apt-get update -qq
apt-get install -y -qq git cmake ninja-build
pip install -q setuptools_git_versioning scikit-build
echo ''
echo '2. PyTorch and CUDA versions:'
python -c 'import torch; print(\"PyTorch:\", torch.__version__); print(\"CUDA:\", torch.version.cuda)'
echo ''
echo '3. Cloning FBGEMM...'
cd /tmp
rm -rf FBGEMM
git clone -q --recursive https://github.com/pytorch/FBGEMM.git
cd FBGEMM/fbgemm_gpu
echo ''
echo '4. Setting build environment for B200 using sm_100a...'
# Per GitHub issue #4975, FBGEMM supports sm_100a (not plain sm_100)
# sm_100a = B200 with full feature support
unset TORCH_CUDA_ARCH_LIST
export MAX_JOBS=8 # Increase parallel jobs
export USE_CUDA=1
echo 'MAX_JOBS=' \$MAX_JOBS
echo 'Building for CUDA arch: 10.0a (sm_100a - B200 with FBGEMM support)'
echo ''
echo '5. Building FBGEMM-GPU with sm_100a for B200...'
echo '=========================================='
# Use sm_100a which FBGEMM may have better support for
python setup.py install \
--build-variant=cuda \
--build-target=default \
-DTORCH_CUDA_ARCH_LIST='10.0a' \
2>&1 | tee /tmp/fbgemm_build_full.log
BUILD_EXIT_CODE=\${PIPESTATUS[0]}
echo ''
echo '=========================================='
echo 'Build exit code:' \$BUILD_EXIT_CODE
echo ''
if [ \$BUILD_EXIT_CODE -ne 0 ]; then
echo '❌ Build FAILED. Checking CMake error...'
echo ''
echo 'CMake configuration errors:'
grep -A 20 'CMake Error' /tmp/fbgemm_build_full.log || echo 'No CMake Error found in log'
echo ''
echo 'Checking for CUDA architecture issues:'
grep -i 'arch\|sm_\|compute capability' /tmp/fbgemm_build_full.log || echo 'No architecture messages'
echo ''
echo 'Last 50 lines of build log:'
tail -50 /tmp/fbgemm_build_full.log
exit 1
fi
echo ''
echo '6. Testing FBGEMM-GPU import...'
python -c '
import sys
try:
import fbgemm_gpu
print(\"✓ FBGEMM-GPU imported successfully\")
print(\"FBGEMM-GPU location:\", fbgemm_gpu.__file__)
except Exception as e:
print(\"❌ Failed to import fbgemm_gpu:\", e)
sys.exit(1)
'
echo ''
echo '7. Testing basic CUDA ops...'
python -c '
import torch
import fbgemm_gpu
if torch.cuda.is_available():
print(\"✓ CUDA available:\", torch.cuda.get_device_name(0))
print(\"✓ Compute capability:\", torch.cuda.get_device_capability(0))
# Try a simple FBGEMM-GPU operation
print(\"Testing FBGEMM-GPU CUDA ops...\")
x = torch.randn(10, 10).cuda()
print(\"✓ Basic CUDA ops work\")
else:
print(\"❌ CUDA not available\")
'
echo ''
echo '=========================================='
echo '✅ ALL TESTS PASSED!'
echo '=========================================='
" 2>&1 | tee fbgemm_debug.log
echo ""
echo "Debug log saved to: fbgemm_debug.log"
echo ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment