lukevanin · April 27, 2025 18:22
diff --git a/beam_unirig.py b/beam_unirig.py
 #
 #
 # Run Unirig on beam.cloud
 #
 # Luke Van In (x.com/lukevanin)
 #
 # How to use this script:
 #
 # 1. Clone the UniRig repository:
 #    git clone https://github.com/VAST-AI-Research/UniRig
 #    cd UniRig
 #
 # 2. Drop this script (beam_unirig.py) into the root directory of the cloned repository.
 #    (Alternatively, adjust paths if placed elsewhere).
 #
 # 3. Download the UniRig model checkpoint files from Hugging Face:
 #    Visit https://huggingface.co/VAST-AI/UniRig/tree/main
 #    Download the necessary model files (e.g., skeleton/articulation-xl/model.ckpt).
 #    The script currently expects these to be downloaded automatically via huggingface_hub,
 #    but manual download might be needed depending on the setup.
 #
 # 4. Set up your Beam.cloud environment:
 #    - Create a persistent volume named 'experiments'. This might be used for caching or storing intermediate results.
 #      beam volume create experiments --size 10G
 #    - (Optional, if manually downloading models) Upload the checkpoint files to the 'experiments' volume.
 #
 # 5. Set up an S3-compatible bucket (e.g., Cloudflare R2, AWS S3, MinIO):
 #    - This script uses a CloudBucket named 'data' for input and output files.
 #    - Create a bucket in your chosen provider.
 #
 # 6. Configure Beam secrets for S3 access:
 #    - Store your S3 access key and secret key as Beam secrets. Replace 'YOUR_ACCESS_KEY' and 'YOUR_SECRET_KEY'.
 #      beam secret create S3_KEY --value 'YOUR_ACCESS_KEY'
 #      beam secret create S3_SECRET --value 'YOUR_SECRET_KEY'
 #    - Update the `CloudBucketConfig` in the script if your endpoint URL is different from the R2 example.
 #
 # 7. Deploy the Beam endpoint:
 #    beam deploy beam_unirig.py
 #
 # Note on File Handling:
 # - This endpoint reads input model files from and writes output files to the configured S3 bucket ('data').
 # - It does *not* directly upload/download files via the Beam CLI trigger command.
 #
 # Next Steps (After Deployment):
 # 1. Upload your input 3D model file (e.g., 'my_model.glb') to your configured S3 bucket.
 # 2. Trigger the endpoint using the Beam CLI or CURL, specifying the input and desired output paths within the bucket:
 #    beam trigger generate-skeleton --data '{"input_model_file_name": "my_model.glb", "output_model_file_name": "output/my_model_skeleton.fbx"}'
 #    (Replace 'generate-skeleton' if you rename the endpoint function).
 # 3. Check your S3 bucket for the generated output file (e.g., 'output/my_model_skeleton.fbx').
 #


 import os
 import shutil
 import time
 import beam
 from beam import CloudBucket, CloudBucketConfig, Output, Volume, endpoint
 from beam import Image as BeamImage
 from PIL import Image
 import trimesh

 # ─────────────── 1. Build the image ───────────────
 machine_image = (
    beam.Image(
        base_image="nvcr.io/nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04",
        python_version=beam.PythonVersion.Python311,
    )
    .add_commands([
        "echo '----- Installing system dependencies -----'",
        "apt-get update && apt-get install -y xorg libgl1-mesa-glx libsm6 libxrender1 libxi6 libxrandr2 libxcursor1 libxinerama1 libglu1-mesa-dev freeglut3-dev libglew2.2 libfontconfig1 libfreetype6 wget",
    ])
    .add_commands([
        "echo '----- Installing python dependencies -----'",
        "python3 -m pip install --upgrade pip",
        "python3 -m pip install setuptools wheel",
        (
            "python3 -m pip install "
            "--extra-index-url https://download.pytorch.org/whl/cu124 "
            "--find-links https://data.pyg.org/whl/torch-2.5.1+cu124.html "
            "psutil "
            "torch==2.5.1 "
            "torchvision==0.20.1 "
            "torchaudio==2.5.1 "
            "spconv-cu124 "
            "torch-scatter "
            "torch-cluster "
            "numpy "
            "transformers "
            "python-box "
            "einops "
            "omegaconf "
            "pytorch_lightning "
            "lightning "
            "addict "
            "timm "
            "fast-simplification "
            "bpy==4.2 "
            "trimesh "
            "open3d "
            "pyrender "
            "huggingface_hub "
        ),
    ])
    .add_commands([
        "echo '----- Installing flash-attention -----'",
        (
            "WHEEL_URL=https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && "
            "WHEEL_NAME=/tmp/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && "
            "wget $WHEEL_URL -O $WHEEL_NAME && "
            "ls -l $WHEEL_NAME && "
            "python3 -m pip install --no-deps $WHEEL_NAME && "
            "rm $WHEEL_NAME"
        )
    ])
    .build_with_gpu("RTX4090")
 )


 user_data_bucket = CloudBucket(
    name="data",
    mount_path="./user_data",
    config=CloudBucketConfig(
        access_key="S3_KEY",
        secret_key="S3_SECRET",
        endpoint="https://<<<cloudlfare-account-id>>>.r2.cloudflarestorage.com",
    ),
 )


 experiments_volume = Volume(name="experiments", mount_path="./experiments")


 # ─────────────── Skeleton generation ───────────────


 @endpoint(
    image=machine_image,
    cpu=4,
    memory="8Gi",
    gpu=["RTX4090"],
    timeout=300,
    volumes=[user_data_bucket, experiments_volume]
 )
 def generate_skeleton(input_model_file_name: str, output_model_file_name: str):

    import torch

    user_data_path = "/volumes/user_data"
    input_model_file = os.path.join(user_data_path, input_model_file_name)
    output_model_file = os.path.join(user_data_path, output_model_file_name)

    print(f"Input model file: {input_model_file}")
    print(f"Output model file: {output_model_file}")

    # List contents of the current directory
    print(f"Current directory: {os.getcwd()}")
    print(f"Contents of current directory: {os.listdir()}")

    # Print current directory
    print(f"Current directory: {os.getcwd()}")
    print(f"Contents of current directory: {os.listdir()}")

    # Print torch version
    print("torch.__version__:", torch.__version__)  # Should be 2.5.1 or compatible
    print("torch.version.cuda:", torch.version.cuda)  # Should be 12.4
    print("torch.cuda.get_arch_list():", torch.cuda.get_arch_list())  # Should include sm_80
    print("torch._C._GLIBCXX_USE_CXX11_ABI:", torch._C._GLIBCXX_USE_CXX11_ABI)
    print("torch.cuda.is_available():", torch.cuda.is_available())
    print("CUDA devices:", torch.cuda.device_count(), torch.cuda.get_device_name(0) if torch.cuda.is_available() else "")

    # copy input model file to /tmp/input.glb
    shutil.copy(input_model_file, "/tmp/input.glb")

    # Make all files in launch/inference executable
    for file in os.listdir("launch/inference"):
        os.chmod(os.path.join("launch/inference", file), 0o111)

    cmd = (
        f"pwd && ls -la && "
        f"launch/inference/generate_skeleton.sh "
        f"--input /tmp/input.glb "
        f"--output /tmp/output.fbx"
    )

    print("running command:", cmd)
    exit_code = os.system(cmd)
    if exit_code != 0:
        raise RuntimeError(f"Unirig skeleton generation failed (exit {exit_code})")

    # copy output model file to output_model_file
    print(f"Copying /tmp/output.fbx to {output_model_file}")
    if not os.path.exists("/tmp/output.fbx"):
        raise FileNotFoundError("Expected /tmp/output.fbx but it was never created")
    shutil.copyfile("/tmp/output.fbx", output_model_file)

    return "ok"
	#
	#
	# Run Unirig on beam.cloud
	#
	# Luke Van In (x.com/lukevanin)
	#
	# How to use this script:
	#
	# 1. Clone the UniRig repository:
	# git clone https://github.com/VAST-AI-Research/UniRig
	# cd UniRig
	#
	# 2. Drop this script (beam_unirig.py) into the root directory of the cloned repository.
	# (Alternatively, adjust paths if placed elsewhere).
	#
	# 3. Download the UniRig model checkpoint files from Hugging Face:
	# Visit https://huggingface.co/VAST-AI/UniRig/tree/main
	# Download the necessary model files (e.g., skeleton/articulation-xl/model.ckpt).
	# The script currently expects these to be downloaded automatically via huggingface_hub,
	# but manual download might be needed depending on the setup.
	#
	# 4. Set up your Beam.cloud environment:
	# - Create a persistent volume named 'experiments'. This might be used for caching or storing intermediate results.
	# beam volume create experiments --size 10G
	# - (Optional, if manually downloading models) Upload the checkpoint files to the 'experiments' volume.
	#
	# 5. Set up an S3-compatible bucket (e.g., Cloudflare R2, AWS S3, MinIO):
	# - This script uses a CloudBucket named 'data' for input and output files.
	# - Create a bucket in your chosen provider.
	#
	# 6. Configure Beam secrets for S3 access:
	# - Store your S3 access key and secret key as Beam secrets. Replace 'YOUR_ACCESS_KEY' and 'YOUR_SECRET_KEY'.
	# beam secret create S3_KEY --value 'YOUR_ACCESS_KEY'
	# beam secret create S3_SECRET --value 'YOUR_SECRET_KEY'
	# - Update the `CloudBucketConfig` in the script if your endpoint URL is different from the R2 example.
	#
	# 7. Deploy the Beam endpoint:
	# beam deploy beam_unirig.py
	#
	# Note on File Handling:
	# - This endpoint reads input model files from and writes output files to the configured S3 bucket ('data').
	# - It does not directly upload/download files via the Beam CLI trigger command.
	#
	# Next Steps (After Deployment):
	# 1. Upload your input 3D model file (e.g., 'my_model.glb') to your configured S3 bucket.
	# 2. Trigger the endpoint using the Beam CLI or CURL, specifying the input and desired output paths within the bucket:
	# beam trigger generate-skeleton --data '{"input_model_file_name": "my_model.glb", "output_model_file_name": "output/my_model_skeleton.fbx"}'
	# (Replace 'generate-skeleton' if you rename the endpoint function).
	# 3. Check your S3 bucket for the generated output file (e.g., 'output/my_model_skeleton.fbx').
	#


	import os
	import shutil
	import time
	import beam
	from beam import CloudBucket, CloudBucketConfig, Output, Volume, endpoint
	from beam import Image as BeamImage
	from PIL import Image
	import trimesh

	# ─────────────── 1. Build the image ───────────────
	machine_image = (
	beam.Image(
	base_image="nvcr.io/nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04",
	python_version=beam.PythonVersion.Python311,
	)
	.add_commands([
	"echo '----- Installing system dependencies -----'",
	"apt-get update && apt-get install -y xorg libgl1-mesa-glx libsm6 libxrender1 libxi6 libxrandr2 libxcursor1 libxinerama1 libglu1-mesa-dev freeglut3-dev libglew2.2 libfontconfig1 libfreetype6 wget",
	])
	.add_commands([
	"echo '----- Installing python dependencies -----'",
	"python3 -m pip install --upgrade pip",
	"python3 -m pip install setuptools wheel",
	(
	"python3 -m pip install "
	"--extra-index-url https://download.pytorch.org/whl/cu124 "
	"--find-links https://data.pyg.org/whl/torch-2.5.1+cu124.html "
	"psutil "
	"torch==2.5.1 "
	"torchvision==0.20.1 "
	"torchaudio==2.5.1 "
	"spconv-cu124 "
	"torch-scatter "
	"torch-cluster "
	"numpy "
	"transformers "
	"python-box "
	"einops "
	"omegaconf "
	"pytorch_lightning "
	"lightning "
	"addict "
	"timm "
	"fast-simplification "
	"bpy==4.2 "
	"trimesh "
	"open3d "
	"pyrender "
	"huggingface_hub "
	),
	])
	.add_commands([
	"echo '----- Installing flash-attention -----'",
	(
	"WHEEL_URL=https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && "
	"WHEEL_NAME=/tmp/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && "
	"wget $WHEEL_URL -O $WHEEL_NAME && "
	"ls -l $WHEEL_NAME && "
	"python3 -m pip install --no-deps $WHEEL_NAME && "
	"rm $WHEEL_NAME"
	)
	])
	.build_with_gpu("RTX4090")
	)


	user_data_bucket = CloudBucket(
	name="data",
	mount_path="./user_data",
	config=CloudBucketConfig(
	access_key="S3_KEY",
	secret_key="S3_SECRET",
	endpoint="https://<<<cloudlfare-account-id>>>.r2.cloudflarestorage.com",
	),
	)


	experiments_volume = Volume(name="experiments", mount_path="./experiments")


	# ─────────────── Skeleton generation ───────────────


	@endpoint(
	image=machine_image,
	cpu=4,
	memory="8Gi",
	gpu=["RTX4090"],
	timeout=300,
	volumes=[user_data_bucket, experiments_volume]
	)
	def generate_skeleton(input_model_file_name: str, output_model_file_name: str):

	import torch

	user_data_path = "/volumes/user_data"
	input_model_file = os.path.join(user_data_path, input_model_file_name)
	output_model_file = os.path.join(user_data_path, output_model_file_name)

	print(f"Input model file: {input_model_file}")
	print(f"Output model file: {output_model_file}")

	# List contents of the current directory
	print(f"Current directory: {os.getcwd()}")
	print(f"Contents of current directory: {os.listdir()}")

	# Print current directory
	print(f"Current directory: {os.getcwd()}")
	print(f"Contents of current directory: {os.listdir()}")

	# Print torch version
	print("torch.__version__:", torch.__version__) # Should be 2.5.1 or compatible
	print("torch.version.cuda:", torch.version.cuda) # Should be 12.4
	print("torch.cuda.get_arch_list():", torch.cuda.get_arch_list()) # Should include sm_80
	print("torch._C._GLIBCXX_USE_CXX11_ABI:", torch._C._GLIBCXX_USE_CXX11_ABI)
	print("torch.cuda.is_available():", torch.cuda.is_available())
	print("CUDA devices:", torch.cuda.device_count(), torch.cuda.get_device_name(0) if torch.cuda.is_available() else "")

	# copy input model file to /tmp/input.glb
	shutil.copy(input_model_file, "/tmp/input.glb")

	# Make all files in launch/inference executable
	for file in os.listdir("launch/inference"):
	os.chmod(os.path.join("launch/inference", file), 0o111)

	cmd = (
	f"pwd && ls -la && "
	f"launch/inference/generate_skeleton.sh "
	f"--input /tmp/input.glb "
	f"--output /tmp/output.fbx"
	)

	print("running command:", cmd)
	exit_code = os.system(cmd)
	if exit_code != 0:
	raise RuntimeError(f"Unirig skeleton generation failed (exit {exit_code})")

	# copy output model file to output_model_file
	print(f"Copying /tmp/output.fbx to {output_model_file}")
	if not os.path.exists("/tmp/output.fbx"):
	raise FileNotFoundError("Expected /tmp/output.fbx but it was never created")
	shutil.copyfile("/tmp/output.fbx", output_model_file)

	return "ok"