Created
February 8, 2018 23:56
-
-
Save PatWie/68abc24de854ba87ff07c68dea61e778 to your computer and use it in GitHub Desktop.
NVML for Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| # Author: Patrick Wieschollek <mail@patwie.com> | |
| """ | |
| A Python2 + Python3 wrapper for NVML | |
| """ | |
| from ctypes import * | |
| import threading | |
| __all__ = ["NvidiaContext"] | |
| _nvmlReturn_t = c_uint | |
| NVML_DEVICE_UUID_BUFFER_SIZE = 80 | |
| NVML_DEVICE_NAME_BUFFER_SIZE = 64 | |
| nvmlLib = None | |
| _lib_lock = threading.Lock() | |
| nvml_init_count = 0 | |
| NvmlErrorCodes = {"0": "NVML_SUCCESS", | |
| "1": "NVML_ERROR_UNINITIALIZED", | |
| "2": "NVML_ERROR_INVALID_ARGUMENT", | |
| "3": "NVML_ERROR_NOT_SUPPORTED", | |
| "4": "NVML_ERROR_NO_PERMISSION", | |
| "5": "NVML_ERROR_ALREADY_INITIALIZED", | |
| "6": "NVML_ERROR_NOT_FOUND", | |
| "7": "NVML_ERROR_INSUFFICIENT_SIZE", | |
| "8": "NVML_ERROR_INSUFFICIENT_POWER", | |
| "9": "NVML_ERROR_DRIVER_NOT_LOADED", | |
| "10": "NVML_ERROR_TIMEOUT", | |
| "11": "NVML_ERROR_IRQ_ISSUE", | |
| "12": "NVML_ERROR_LIBRARY_NOT_FOUND", | |
| "13": "NVML_ERROR_FUNCTION_NOT_FOUND", | |
| "14": "NVML_ERROR_CORRUPTED_INFOROM", | |
| "15": "NVML_ERROR_GPU_IS_LOST", | |
| "16": "NVML_ERROR_RESET_REQUIRED", | |
| "17": "NVML_ERROR_OPERATING_SYSTEM", | |
| "18": "NVML_ERROR_LIB_RM_VERSION_MISMATCH", | |
| "999": "NVML_ERROR_UNKNOWN"} | |
| def NvmlCodeToString(i): | |
| return NvmlErrorCodes[str(i)] | |
| class NvmlException(Exception): | |
| def __init__(self, error_code): | |
| super(NvmlException, self).__init__(error_code) | |
| self.error_code = error_code | |
| def __str__(self): | |
| return NvmlCodeToString(self.error_code) | |
| def CheckNvmlReturn(ret): | |
| if (ret != 0): | |
| raise NvmlException(ret) | |
| return ret | |
| class NVML(object): | |
| def load(self): | |
| global nvmlLib | |
| _lib_lock.acquire() | |
| try: | |
| nvmlLib = CDLL("libnvidia-ml.so.1") | |
| except OSError: | |
| NvmlException(NVML_ERROR_LIBRARY_NOT_FOUND) | |
| finally: | |
| _lib_lock.release() | |
| self.build_cache() | |
| def function_pointer(self, name): | |
| _lib_lock.acquire() | |
| try: | |
| try: | |
| return getattr(nvmlLib, name) | |
| except AttributeError: | |
| raise Exception(NVML_ERROR_FUNCTION_NOT_FOUND) | |
| finally: | |
| _lib_lock.release() | |
| def build_cache(self): | |
| function_pointers = ["nvmlDeviceGetName", "nvmlDeviceGetUUID", "nvmlDeviceGetMemoryInfo", | |
| "nvmlDeviceGetUtilizationRates", "nvmlInit_v2", "nvmlShutdown", | |
| "nvmlDeviceGetCount_v2", "nvmlDeviceGetHandleByIndex_v2"] | |
| self.func_ptr = {n: self.function_pointer(n) for n in function_pointers} | |
| def get_function(self, name): | |
| if name in self.func_ptr.keys(): | |
| return self.func_ptr[name] | |
| def call(self, hnd, buf_len, func_ptr): | |
| c_name = create_string_buffer(buf_len) | |
| # fn = function_pointer_cache[func_ptr] | |
| fn = self.func_ptr[func_ptr] | |
| ret = fn(hnd, c_name, c_uint(buf_len)) | |
| CheckNvmlReturn(ret) | |
| return c_name | |
| _NVML = NVML() | |
| class GpuDevice(Structure): | |
| """Represent GPU Information | |
| """ | |
| pass | |
| c_nvmlDevice_t = POINTER(GpuDevice) | |
| class NvidiaDevice(object): | |
| """Represent a single GPUDevice""" | |
| def __init__(self, hnd): | |
| super(NvidiaDevice, self).__init__() | |
| self.hnd = hnd | |
| def Name(self): | |
| """Return GPU name | |
| Example: | |
| >>> print(nvidia.Device(0).Name()) | |
| GeForce GTX 970 | |
| Returns: | |
| Name of GPU brand | |
| """ | |
| return _NVML.call(self.hnd, NVML_DEVICE_NAME_BUFFER_SIZE, "nvmlDeviceGetName").value | |
| def UUID(self): | |
| """Return GPU UUID | |
| Example: | |
| >>> print(nvidia.Device(0).UUID()) | |
| GPU-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | |
| Returns: | |
| UUID of GPU | |
| """ | |
| return _NVML.call(self.hnd, NVML_DEVICE_UUID_BUFFER_SIZE, "nvmlDeviceGetUUID").value | |
| def Memory(self): | |
| """Memory information in bytes | |
| Example: | |
| >>> print(nvidia.Device(0).Memory()) | |
| {'total': 4238016512L, 'used': 434831360L, 'free': 3803185152L} | |
| Returns: | |
| total/used/free memory in bytes | |
| """ | |
| class GpuMemoryInfo(Structure): | |
| _fields_ = [ | |
| ('total', c_ulonglong), | |
| ('free', c_ulonglong), | |
| ('used', c_ulonglong), | |
| ] | |
| c_memory = GpuMemoryInfo() | |
| CheckNvmlReturn(_NVML.get_function("nvmlDeviceGetMemoryInfo")(self.hnd, byref(c_memory))) | |
| return {'total': c_memory.total, 'free': c_memory.free, 'used': c_memory.used} | |
| def Utilization(self): | |
| """Percent of time over the past second was utilized. | |
| Details: | |
| Percent of time over the past second during which one or more kernels was executing on the GPU. | |
| Percent of time over the past second during which global (device) memory was being read or written | |
| Example: | |
| >>> print(nvidia.Device(0).Memory()) | |
| {'gpu': 4L, 'memory': 6L} | |
| """ | |
| class GpuUtilizationInfo(Structure): | |
| _fields_ = [ | |
| ('gpu', c_uint), | |
| ('memory', c_uint), | |
| ] | |
| c_util = GpuUtilizationInfo() | |
| CheckNvmlReturn(_NVML.get_function("nvmlDeviceGetUtilizationRates")(self.hnd, byref(c_util))) | |
| return {'gpu': c_util.gpu, 'memory': c_util.memory} | |
| class NvidiaContext(object): | |
| """Creates a context to query information | |
| """ | |
| def __init__(self): | |
| super(NvidiaContext, self).__init__() | |
| def create_context(self): | |
| """Create a new context | |
| """ | |
| _NVML.load() | |
| CheckNvmlReturn(_NVML.get_function("nvmlInit_v2")()) | |
| # Atomically update refcount | |
| global nvml_init_count | |
| _lib_lock.acquire() | |
| nvml_init_count += 1 | |
| _lib_lock.release() | |
| def destroy_context(self): | |
| """Destroy current context | |
| """ | |
| CheckNvmlReturn(_NVML.get_function("nvmlShutdown")()) | |
| # Atomically update refcount | |
| global nvml_init_count | |
| _lib_lock.acquire() | |
| if (0 < nvml_init_count): | |
| nvml_init_count -= 1 | |
| _lib_lock.release() | |
| def NumCudaDevices(self): | |
| """Get number of CUDA devices. | |
| Example: | |
| >>> num_gpus = nvidia.NumCudaDevices() | |
| 1 | |
| Returns: | |
| count CUDA devices | |
| """ | |
| c_count = c_uint() | |
| CheckNvmlReturn(_NVML.get_function("nvmlDeviceGetCount_v2")(byref(c_count))) | |
| return c_count.value | |
| def Devices(self): | |
| """Generator of devices in context | |
| Yields: | |
| NvidiaDevice: single CUDA device | |
| """ | |
| for i in range(self.NumCudaDevices()): | |
| yield self.Device(i) | |
| def Device(self, idx): | |
| """Get specific CUDA device | |
| Args: | |
| idx: index of device | |
| Returns: | |
| NvidiaDevice: single CUDA device | |
| """ | |
| c_index = c_uint(idx) | |
| device = c_nvmlDevice_t() | |
| CheckNvmlReturn(_NVML.get_function("nvmlDeviceGetHandleByIndex_v2")(c_index, byref(device))) | |
| return NvidiaDevice(device) | |
| if __name__ == '__main__': | |
| nvidia = NvidiaContext() | |
| nvidia.create_context() | |
| num_gpus = nvidia.NumCudaDevices() | |
| for device in nvidia.Devices(): | |
| print(device.Name()) | |
| print(device.UUID()) | |
| print(device.Memory()) | |
| print(device.Utilization()) | |
| nvidia.destroy_context() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment