Created
August 11, 2024 14:12
-
-
Save MMMarcy/8f8bc3f660e01eb46cf28c654d373a62 to your computer and use it in GitHub Desktop.
configure nvidia-device-plugin in K8s using pulumi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Installs and configures the nvidia device plugin.""" | |
from textwrap import dedent | |
import pulumi | |
from pulumi_kubernetes.apps.v1 import ( | |
DaemonSet, | |
DaemonSetSpecArgs, | |
DaemonSetUpdateStrategyArgs, | |
) | |
from pulumi_kubernetes.core.v1 import ( | |
CapabilitiesArgs, | |
ConfigMap, | |
ConfigMapVolumeSourceArgs, | |
ContainerArgs, | |
EnvVarArgs, | |
HostPathVolumeSourceArgs, | |
KeyToPathArgs, | |
PodSpecArgs, | |
PodTemplateSpecArgs, | |
SecurityContextArgs, | |
TolerationArgs, | |
VolumeArgs, | |
VolumeMountArgs, | |
) | |
from pulumi_kubernetes.meta.v1 import LabelSelectorArgs, ObjectMetaArgs | |
def provide_nvidia_demonset() -> DaemonSet: | |
"""Sets up the daemonset to multiplex gpus.""" | |
cm = ConfigMap( | |
"nvidia-device-plugin-config-map", | |
metadata=ObjectMetaArgs( | |
name="nvidia-device-plugin-config-map", namespace="kube-system" | |
), | |
data={ | |
"config.json": dedent( | |
""" | |
{ | |
"version": "v1", | |
"flags": { | |
"migStrategy": "mixed", | |
"failOnInitError": false, | |
"mpsRoot": "", | |
"nvidiaDriverRoot": "/", | |
"nvidiaDevRoot": "/", | |
"gdsEnabled": false, | |
"mofedEnabled": false, | |
"useNodeFeatureAPI": null, | |
"deviceDiscoveryStrategy": "auto", | |
"plugin": { | |
"passDeviceSpecs": false, | |
"deviceListStrategy": [ | |
"envvar" | |
], | |
"deviceIDStrategy": "uuid", | |
"cdiAnnotationPrefix": "cdi.k8s.io/", | |
"nvidiaCTKPath": "/usr/bin/nvidia-ctk", | |
"containerDriverRoot": "/driver-root" | |
} | |
}, | |
"resources": { | |
"gpus": [ | |
{ | |
"pattern": "*", | |
"name": "nvidia.com/gpu" | |
} | |
] | |
}, | |
"sharing": { | |
"timeSlicing": { | |
"resources": [ | |
{"name": "nvidia.com/gpu", "replicas": 2} | |
] | |
} | |
} | |
} | |
""" | |
) | |
}, | |
) | |
ds = DaemonSet( | |
"nvidia-device-plugin", | |
metadata=ObjectMetaArgs( | |
name="nvidia-device-plugin-daemonset", | |
namespace="kube-system", | |
), | |
spec=DaemonSetSpecArgs( | |
selector=LabelSelectorArgs( | |
match_labels={"name": "nvidia-device-plugin-ds"} | |
), | |
update_strategy=DaemonSetUpdateStrategyArgs(type="RollingUpdate"), | |
template=PodTemplateSpecArgs( | |
metadata=ObjectMetaArgs(labels={"name": "nvidia-device-plugin-ds"}), | |
spec=PodSpecArgs( | |
tolerations=[ | |
TolerationArgs( | |
key="nvidia.com/gpu", operator="Exists", effect="NoSchedule" | |
) | |
], | |
priority_class_name="system-node-critical", | |
containers=[ | |
ContainerArgs( | |
name="nvidia-device-plugin-ctr", | |
image="nvcr.io/nvidia/k8s-device-plugin:v0.16.2", | |
env=[ | |
EnvVarArgs(name="FAIL_ON_INIT_ERROR", value="false"), | |
EnvVarArgs(name="CONFIG_FILE", value="/config"), | |
], | |
security_context=SecurityContextArgs( | |
allow_privilege_escalation=False, | |
capabilities=CapabilitiesArgs(drop=["all"]), | |
), | |
volume_mounts=[ | |
VolumeMountArgs( | |
name="device-plugin", | |
mount_path="/var/lib/kubelet/device-plugins", | |
), | |
VolumeMountArgs( | |
name="configuration", | |
mount_path="/config", | |
sub_path="config.json", | |
), | |
], | |
) | |
], | |
volumes=[ | |
VolumeArgs( | |
name="device-plugin", | |
host_path=HostPathVolumeSourceArgs( | |
path="/var/lib/kubelet/device-plugins" | |
), | |
), | |
VolumeArgs( | |
name="configuration", | |
config_map=ConfigMapVolumeSourceArgs( | |
name=cm.metadata["name"], | |
items=[ | |
KeyToPathArgs(key="config.json", path="config.json") | |
], | |
), | |
), | |
], | |
), | |
), | |
), | |
) | |
pulumi.export( | |
"nvidia-device-plugin", | |
{ | |
"config_map": cm.metadata["name"], | |
"daemon_set": ds.metadata["name"], | |
}, | |
) | |
return ds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment