Last active
March 8, 2026 07:55
-
-
Save devnexen/91bc6818b999fcf963b420055c055e9e to your computer and use it in GitHub Desktop.
Benchmark: NVML API vs sysfs read for GPU NUMA node ID retrieval
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #![cfg(feature = "gpu-topology")] | |
| use nvml_wrapper::bitmasks::InitFlags; | |
| use nvml_wrapper::Nvml; | |
| use std::path::Path; | |
| use std::time::Instant; | |
| fn read_from_file<T: std::str::FromStr>(path: &Path) -> Option<T> { | |
| std::fs::read_to_string(path) | |
| .ok() | |
| .and_then(|s| s.trim().parse().ok()) | |
| } | |
| fn main() { | |
| let nvml = Nvml::init_with_flags(InitFlags::NO_GPUS).expect("Failed to init NVML"); | |
| let count = nvml.device_count().expect("Failed to get device count"); | |
| println!("Found {} NVIDIA GPU(s)\n", count); | |
| const ITERATIONS: u32 = 100_000; | |
| for i in 0..count { | |
| let gpu = nvml.device_by_index(i).expect("Failed to get device"); | |
| let pci_info = gpu.pci_info().expect("Failed to get PCI info"); | |
| println!("GPU {}: bus_id={}", i, pci_info.bus_id); | |
| // Benchmark: NVML API call (new path) | |
| let start = Instant::now(); | |
| for _ in 0..ITERATIONS { | |
| let _ = std::hint::black_box(gpu.numa_node_id()); | |
| } | |
| let api_elapsed = start.elapsed(); | |
| // Benchmark: sysfs read (old path) | |
| let bus_id = pci_info.bus_id.to_lowercase(); | |
| let fixed_bus_id = bus_id.strip_prefix("0000").unwrap_or(&bus_id); | |
| let numa_path = format!("/sys/bus/pci/devices/{}/numa_node", fixed_bus_id); | |
| let start = Instant::now(); | |
| for _ in 0..ITERATIONS { | |
| let _ = std::hint::black_box(read_from_file::<usize>(Path::new(&numa_path))); | |
| } | |
| let sysfs_elapsed = start.elapsed(); | |
| println!(" NVML API (numa_node_id): {:?} ({} iters)", api_elapsed, ITERATIONS); | |
| println!(" sysfs read: {:?} ({} iters)", sysfs_elapsed, ITERATIONS); | |
| println!( | |
| " Speedup: {:.1}x faster", | |
| sysfs_elapsed.as_nanos() as f64 / api_elapsed.as_nanos() as f64 | |
| ); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment