Skip to content

Instantly share code, notes, and snippets.

@hsinhoyeh
Created October 12, 2023 15:23
Show Gist options
  • Save hsinhoyeh/36e6b17b167914a1953a5be13c6a6eff to your computer and use it in GitHub Desktop.
Save hsinhoyeh/36e6b17b167914a1953a5be13c6a6eff to your computer and use it in GitHub Desktop.
apiVersion: v1
kind: Namespace
metadata:
labels:
kubernetes.io/metadata.name: gpu-operator
name: gpu-operator
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.8.0
creationTimestamp: null
name: clusterpolicies.nvidia.com
spec:
group: nvidia.com
names:
kind: ClusterPolicy
listKind: ClusterPolicyList
plural: clusterpolicies
singular: clusterpolicy
scope: Cluster
versions:
- additionalPrinterColumns:
- jsonPath: .status.state
name: Status
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: string
name: v1
schema:
openAPIV3Schema:
description: ClusterPolicy is the Schema for the clusterpolicies API
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: ClusterPolicySpec defines the desired state of ClusterPolicy
properties:
cdi:
description: CDI configures how the Container Device Interface is
used in the cluster
properties:
default:
default: false
description: Default indicates whether to use CDI as the default
mechanism for providing GPU access to containers.
type: boolean
enabled:
default: false
description: Enabled indicates whether CDI can be used to make
GPUs accessible to containers.
type: boolean
type: object
daemonsets:
description: Daemonset defines common configuration for all Daemonsets
properties:
annotations:
additionalProperties:
type: string
description: 'Optional: Annotations is an unstructured key value
map stored with a resource that may be set by external tools
to store and retrieve arbitrary metadata. They are not queryable
and should be preserved when modifying objects.'
type: object
labels:
additionalProperties:
type: string
description: 'Optional: Map of string keys and values that can
be used to organize and categorize (scope and select) objects.
May match selectors of replication controllers and services.'
type: object
priorityClassName:
type: string
rollingUpdate:
description: 'Optional: Configuration for rolling update of all
DaemonSet pods'
properties:
maxUnavailable:
type: string
type: object
tolerations:
description: 'Optional: Set tolerations'
items:
description: The pod this Toleration is attached to tolerates
any taint that matches the triple <key,value,effect> using
the matching operator <operator>.
properties:
effect:
description: Effect indicates the taint effect to match.
Empty means match all taint effects. When specified, allowed
values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: Key is the taint key that the toleration applies
to. Empty means match all taint keys. If the key is empty,
operator must be Exists; this combination means to match
all values and all keys.
type: string
operator:
description: Operator represents a key's relationship to
the value. Valid operators are Exists and Equal. Defaults
to Equal. Exists is equivalent to wildcard for value,
so that a pod can tolerate all taints of a particular
category.
type: string
tolerationSeconds:
description: TolerationSeconds represents the period of
time the toleration (which must be of effect NoExecute,
otherwise this field is ignored) tolerates the taint.
By default, it is not set, which means tolerate the taint
forever (do not evict). Zero and negative values will
be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: Value is the taint value the toleration matches
to. If the operator is Exists, the value should be empty,
otherwise just a regular string.
type: string
type: object
type: array
updateStrategy:
default: RollingUpdate
enum:
- RollingUpdate
- OnDelete
type: string
type: object
dcgm:
description: DCGM component spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
as a separate pod is enabled.
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
hostPort:
description: 'HostPort represents host port that needs to be bound
for DCGM engine (Default: 5555)'
format: int32
type: integer
image:
description: NVIDIA DCGM image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA DCGM image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA DCGM image tag
type: string
type: object
dcgmExporter:
description: DCGMExporter spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
config:
description: 'Optional: Custom metrics configuration for NVIDIA
DCGM Exporter'
properties:
name:
description: ConfigMap name with file dcgm-metrics.csv for
metrics to be collected by NVIDIA DCGM Exporter
type: string
type: object
enabled:
description: Enabled indicates if deployment of NVIDIA DCGM Exporter
through operator is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA DCGM Exporter image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA DCGM Exporter image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
serviceMonitor:
description: 'Optional: ServiceMonitor configuration for NVIDIA
DCGM Exporter'
properties:
additionalLabels:
additionalProperties:
type: string
description: AdditionalLabels to add to ServiceMonitor instance
for NVIDIA DCGM Exporter
type: object
enabled:
description: Enabled indicates if ServiceMonitor is deployed
for NVIDIA DCGM Exporter
type: boolean
honorLabels:
description: HonorLabels chooses the metric’s labels on collisions
with target labels.
type: boolean
interval:
description: 'Interval which metrics should be scraped from
NVIDIA DCGM Exporter. If not specified Prometheus’ global
scrape interval is used. Supported units: y, w, d, h, m,
s, ms'
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
type: object
version:
description: NVIDIA DCGM Exporter image tag
type: string
type: object
devicePlugin:
description: DevicePlugin component spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
config:
description: 'Optional: Configuration for the NVIDIA Device Plugin
via the ConfigMap'
properties:
default:
description: Default config name within the ConfigMap for
the NVIDIA Device Plugin config
type: string
name:
description: ConfigMap name for NVIDIA Device Plugin config
including shared config between plugin and GFD
type: string
type: object
enabled:
description: Enabled indicates if deployment of NVIDIA Device
Plugin through operator is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA Device Plugin image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA Device Plugin image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA Device Plugin image tag
type: string
type: object
driver:
description: Driver component spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
certConfig:
description: 'Optional: Custom certificates configuration for
NVIDIA Driver container'
properties:
name:
type: string
type: object
enabled:
description: Enabled indicates if deployment of NVIDIA Driver
through operator is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA Driver image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
kernelModuleConfig:
description: 'Optional: Kernel module configuration parameters
for the NVIDIA Driver'
properties:
name:
type: string
type: object
licensingConfig:
description: 'Optional: Licensing configuration for NVIDIA vGPU
licensing'
properties:
configMapName:
type: string
nlsEnabled:
description: NLSEnabled indicates if NVIDIA Licensing System
is used for licensing.
type: boolean
type: object
livenessProbe:
description: NVIDIA Driver container liveness probe settings
properties:
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
3. Minimum value is 1.
format: int32
minimum: 1
type: integer
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
periodSeconds:
description: How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
minimum: 1
type: integer
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
minimum: 1
type: integer
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
minimum: 1
type: integer
type: object
manager:
description: Manager represents configuration for NVIDIA Driver
Manager initContainer
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: Image represents NVIDIA Driver Manager image
name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: Repository represents Driver Managerrepository
path
type: string
version:
description: Version represents NVIDIA Driver Manager image
tag(version)
type: string
type: object
rdma:
description: GPUDirectRDMASpec defines the properties for nvidia-peermem
deployment
properties:
enabled:
description: Enabled indicates if GPUDirect RDMA is enabled
through GPU operator
type: boolean
useHostMofed:
description: UseHostMOFED indicates to use MOFED drivers directly
installed on the host to enable GPUDirect RDMA
type: boolean
type: object
readinessProbe:
description: NVIDIA Driver container readiness probe settings
properties:
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
3. Minimum value is 1.
format: int32
minimum: 1
type: integer
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
periodSeconds:
description: How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
minimum: 1
type: integer
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
minimum: 1
type: integer
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
minimum: 1
type: integer
type: object
repoConfig:
description: 'Optional: Custom repo configuration for NVIDIA Driver
container'
properties:
configMapName:
type: string
type: object
repository:
description: NVIDIA Driver image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
startupProbe:
description: NVIDIA Driver container startup probe settings
properties:
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
3. Minimum value is 1.
format: int32
minimum: 1
type: integer
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
periodSeconds:
description: How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
minimum: 1
type: integer
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
minimum: 1
type: integer
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
minimum: 1
type: integer
type: object
upgradePolicy:
description: Driver auto-upgrade settings
properties:
autoUpgrade:
default: false
description: AutoUpgrade is a global switch for automatic
upgrade feature if set to false all other options are ignored
type: boolean
drain:
description: DrainSpec describes configuration for node drain
during automatic upgrade
properties:
deleteEmptyDir:
default: false
description: DeleteEmptyDir indicates if should continue
even if there are pods using emptyDir (local data that
will be deleted when the node is drained)
type: boolean
enable:
default: false
description: Enable indicates if node draining is allowed
during upgrade
type: boolean
force:
default: false
description: Force indicates if force draining is allowed
type: boolean
podSelector:
description: 'PodSelector specifies a label selector to
filter pods on the node that need to be drained For
more details on label selectors, see: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors'
type: string
timeoutSeconds:
default: 300
description: TimeoutSecond specifies the length of time
in seconds to wait before giving up drain, zero means
infinite
minimum: 0
type: integer
type: object
maxParallelUpgrades:
default: 1
description: MaxParallelUpgrades indicates how many nodes
can be upgraded in parallel 0 means no limit, all nodes
will be upgraded in parallel
minimum: 0
type: integer
maxUnavailable:
anyOf:
- type: integer
- type: string
default: 25%
description: 'MaxUnavailable is the maximum number of nodes
with the driver installed, that can be unavailable during
the upgrade. Value can be an absolute number (ex: 5) or
a percentage of total nodes at the start of upgrade (ex:
10%). Absolute number is calculated from percentage by rounding
up. By default, a fixed value of 25% is used.'
x-kubernetes-int-or-string: true
podDeletion:
description: PodDeletionSpec describes configuration for deletion
of pods using special resources during automatic upgrade
properties:
deleteEmptyDir:
default: false
description: DeleteEmptyDir indicates if should continue
even if there are pods using emptyDir (local data that
will be deleted when the pod is deleted)
type: boolean
force:
default: false
description: Force indicates if force deletion is allowed
type: boolean
timeoutSeconds:
default: 300
description: TimeoutSecond specifies the length of time
in seconds to wait before giving up on pod termination,
zero means infinite
minimum: 0
type: integer
type: object
waitForCompletion:
description: WaitForCompletionSpec describes the configuration
for waiting on job completions
properties:
podSelector:
description: 'PodSelector specifies a label selector for
the pods to wait for completion For more details on
label selectors, see: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors'
type: string
timeoutSeconds:
default: 0
description: TimeoutSecond specifies the length of time
in seconds to wait before giving up on pod termination,
zero means infinite
minimum: 0
type: integer
type: object
type: object
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA
Driver using pre-compiled modules is enabled
type: boolean
version:
description: NVIDIA Driver image tag
type: string
virtualTopology:
description: 'Optional: Virtual Topology Daemon configuration
for NVIDIA vGPU drivers'
properties:
config:
description: 'Optional: Config name representing virtual topology
daemon configuration file nvidia-topologyd.conf'
type: string
type: object
type: object
gds:
description: GPUDirectStorage defines the spec for GDS components(Experimental)
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if GPUDirect Storage is enabled
through GPU operator
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA GPUDirect Storage Driver image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA GPUDirect Storage Driver image repository
type: string
version:
description: NVIDIA GPUDirect Storage Driver image tag
type: string
type: object
gfd:
description: GPUFeatureDiscovery spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if deployment of GPU Feature Discovery
Plugin is enabled.
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: GFD image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: GFD image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: GFD image tag
type: string
type: object
mig:
description: MIG spec
properties:
strategy:
description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
Device Plugin'
enum:
- none
- single
- mixed
type: string
type: object
migManager:
description: MIGManager for configuration to deploy MIG Manager
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
config:
description: 'Optional: Custom mig-parted configuration for NVIDIA
MIG Manager container'
properties:
default:
default: all-disabled
description: Default MIG config to be applied on the node,
when there is no config specified with the node label nvidia.com/mig.config
enum:
- all-disabled
- ""
type: string
name:
default: default-mig-parted-config
description: ConfigMap name
type: string
type: object
enabled:
description: Enabled indicates if deployment of NVIDIA MIG Manager
is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
gpuClientsConfig:
description: 'Optional: Custom gpu-clients configuration for NVIDIA
MIG Manager container'
properties:
name:
description: ConfigMap name
type: string
type: object
image:
description: NVIDIA MIG Manager image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA MIG Manager image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA MIG Manager image tag
type: string
type: object
nodeStatusExporter:
description: NodeStatusExporter spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if deployment of Node Status Exporter
is enabled.
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: Node Status Exporter image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: Node Status Exporterimage repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: Node Status Exporterimage tag
type: string
type: object
operator:
description: Operator component spec
properties:
annotations:
additionalProperties:
type: string
description: 'Optional: Annotations is an unstructured key value
map stored with a resource that may be set by external tools
to store and retrieve arbitrary metadata. They are not queryable
and should be preserved when modifying objects.'
type: object
defaultRuntime:
default: docker
description: Runtime defines container runtime type
enum:
- docker
- crio
- containerd
type: string
initContainer:
description: InitContainerSpec describes configuration for initContainer
image used with all components
properties:
image:
description: Image represents image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: Repository represents image repository path
type: string
version:
description: Version represents image tag(version)
type: string
type: object
labels:
additionalProperties:
type: string
description: 'Optional: Map of string keys and values that can
be used to organize and categorize (scope and select) objects.
May match selectors of replication controllers and services.'
type: object
runtimeClass:
default: nvidia
type: string
use_ocp_driver_toolkit:
description: UseOpenShiftDriverToolkit indicates if DriverToolkit
image should be used on OpenShift to build and install driver
modules
type: boolean
required:
- defaultRuntime
type: object
psp:
description: PSP defines spec for handling PodSecurityPolicies
properties:
enabled:
description: Enabled indicates if PodSecurityPolicies needs to
be enabled for all Pods
type: boolean
type: object
sandboxDevicePlugin:
description: SandboxDevicePlugin component spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if deployment of NVIDIA Sandbox
Device Plugin through operator is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA Sandbox Device Plugin image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA Sandbox Device Plugin image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA Sandbox Device Plugin image tag
type: string
type: object
sandboxWorkloads:
description: SandboxWorkloads defines the spec for handling sandbox
workloads (i.e. Virtual Machines)
properties:
defaultWorkload:
default: container
description: DefaultWorkload indicates the default GPU workload
type to configure worker nodes in the cluster for
enum:
- container
- vm-passthrough
- vm-vgpu
type: string
enabled:
description: Enabled indicates if the GPU Operator should manage
additional operands required for sandbox workloads (i.e. VFIO
Manager, vGPU Manager, and additional device plugins)
type: boolean
type: object
toolkit:
description: Toolkit component spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
enabled:
description: Enabled indicates if deployment of NVIDIA Container
Toolkit through operator is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA Container Toolkit image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
installDir:
default: /usr/local/nvidia
description: Toolkit install directory on the host
type: string
repository:
description: NVIDIA Container Toolkit image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA Container Toolkit image tag
type: string
type: object
validator:
description: Validator defines the spec for operator-validator daemonset
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
cuda:
description: CUDA validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
driver:
description: Toolkit validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: Validator image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
plugin:
description: Plugin validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
repository:
description: Validator image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
toolkit:
description: Toolkit validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
version:
description: Validator image tag
type: string
vfioPCI:
description: VfioPCI validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
vgpuDevices:
description: VGPUDevices validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
vgpuManager:
description: VGPUManager validator spec
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
type: object
type: object
vfioManager:
description: VFIOManager for configuration to deploy VFIO-PCI Manager
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
driverManager:
description: DriverManager represents configuration for NVIDIA
Driver Manager
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: Image represents NVIDIA Driver Manager image
name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: Repository represents Driver Managerrepository
path
type: string
version:
description: Version represents NVIDIA Driver Manager image
tag(version)
type: string
type: object
enabled:
description: Enabled indicates if deployment of VFIO Manager is
enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: VFIO Manager image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: VFIO Manager image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: VFIO Manager image tag
type: string
type: object
vgpuDeviceManager:
description: VGPUDeviceManager spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
config:
description: NVIDIA vGPU devices configuration for NVIDIA vGPU
Device Manager container
properties:
default:
default: default
description: Default config name within the ConfigMap
type: string
name:
description: ConfigMap name
type: string
type: object
enabled:
description: Enabled indicates if deployment of NVIDIA vGPU Device
Manager is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA vGPU Device Manager image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA vGPU Device Manager image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA vGPU Device Manager image tag
type: string
type: object
vgpuManager:
description: VGPUManager component spec
properties:
args:
description: 'Optional: List of arguments'
items:
type: string
type: array
driverManager:
description: DriverManager represents configuration for NVIDIA
Driver Manager initContainer
properties:
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must
be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables
in the container and any service environment variables.
If a variable cannot be resolved, the reference in
the input string will be unchanged. Double $$ are
reduced to a single $, which allows for escaping the
$(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce
the string literal "$(VAR_NAME)". Escaped references
will never be expanded, regardless of whether the
variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or
its key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports
metadata.name, metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in
the specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container:
only resources limits and requests (limits.cpu,
limits.memory, limits.ephemeral-storage, requests.cpu,
requests.memory and requests.ephemeral-storage)
are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of
the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: Image represents NVIDIA Driver Manager image
name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: Repository represents Driver Managerrepository
path
type: string
version:
description: Version represents NVIDIA Driver Manager image
tag(version)
type: string
type: object
enabled:
description: Enabled indicates if deployment of NVIDIA vGPU Manager
through operator is enabled
type: boolean
env:
description: 'Optional: List of environment variables'
items:
description: EnvVar represents an environment variable present
in a Container.
properties:
name:
description: Name of the environment variable. Must be a
C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in
the container and any service environment variables. If
a variable cannot be resolved, the reference in the input
string will be unchanged. Double $$ are reduced to a single
$, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless
of whether the variable exists or not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value.
Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`,
`metadata.annotations[''<KEY>'']`, spec.nodeName,
spec.serviceAccountName, status.hostIP, status.podIP,
status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the
specified API version.
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
secretKeyRef:
description: Selects a key of a secret in the pod's
namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
image:
description: NVIDIA vGPU Manager image name
pattern: '[a-zA-Z0-9\-]+'
type: string
imagePullPolicy:
description: Image pull policy
type: string
imagePullSecrets:
description: Image pull secrets
items:
type: string
type: array
repository:
description: NVIDIA vGPU Manager image repository
type: string
resources:
description: 'Optional: Define resources requests and limits for
each pod'
properties:
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
version:
description: NVIDIA vGPU Manager image tag
type: string
type: object
required:
- daemonsets
- dcgm
- dcgmExporter
- devicePlugin
- driver
- gfd
- nodeStatusExporter
- operator
- toolkit
type: object
status:
description: ClusterPolicyStatus defines the observed state of ClusterPolicy
properties:
namespace:
description: Namespace indicates a namespace in which the operator
is installed
type: string
state:
description: State indicates status of ClusterPolicy
enum:
- ignored
- ready
- notReady
type: string
required:
- state
type: object
type: object
served: true
storage: true
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.9.2
creationTimestamp: null
name: nodefeaturerules.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
names:
kind: NodeFeatureRule
listKind: NodeFeatureRuleList
plural: nodefeaturerules
shortNames:
- nfr
singular: nodefeaturerule
scope: Cluster
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
description: NodeFeatureRule resource specifies a configuration for feature-based
customization of node objects, such as node labeling.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: NodeFeatureRuleSpec describes a NodeFeatureRule.
properties:
rules:
description: Rules is a list of node customization rules.
items:
description: Rule defines a rule for node customization such as
labeling.
properties:
labels:
additionalProperties:
type: string
description: Labels to create if the rule matches.
type: object
labelsTemplate:
description: LabelsTemplate specifies a template to expand for
dynamically generating multiple labels. Data (after template
expansion) must be keys with an optional value (<key>[=<value>])
separated by newlines.
type: string
matchAny:
description: MatchAny specifies a list of matchers one of which
must match.
items:
description: MatchAnyElem specifies one sub-matcher of MatchAny.
properties:
matchFeatures:
description: MatchFeatures specifies a set of matcher
terms all of which must match.
items:
description: FeatureMatcherTerm defines requirements
against one feature set. All requirements (specified
as MatchExpressions) are evaluated against each element
in the feature set.
properties:
feature:
type: string
matchExpressions:
additionalProperties:
description: "MatchExpression specifies an expression
to evaluate against a set of input values. It
contains an operator that is applied when matching
the input and an array of values that the operator
evaluates the input against. \n NB: CreateMatchExpression
or MustCreateMatchExpression() should be used
for creating new instances. \n NB: Validate()
must be called if Op or Value fields are modified
or if a new instance is created from scratch
without using the helper functions."
properties:
op:
description: Op is the operator to be applied.
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
description: Value is the list of values that
the operand evaluates the input against.
Value should be empty if the operator is
Exists, DoesNotExist, IsTrue or IsFalse.
Value should contain exactly one element
if the operator is Gt or Lt and exactly
two elements if the operator is GtLt. In
other cases Value should contain at least
one element.
items:
type: string
type: array
required:
- op
type: object
description: MatchExpressionSet contains a set of
MatchExpressions, each of which is evaluated against
a set of input values.
type: object
required:
- feature
- matchExpressions
type: object
type: array
required:
- matchFeatures
type: object
type: array
matchFeatures:
description: MatchFeatures specifies a set of matcher terms
all of which must match.
items:
description: FeatureMatcherTerm defines requirements against
one feature set. All requirements (specified as MatchExpressions)
are evaluated against each element in the feature set.
properties:
feature:
type: string
matchExpressions:
additionalProperties:
description: "MatchExpression specifies an expression
to evaluate against a set of input values. It contains
an operator that is applied when matching the input
and an array of values that the operator evaluates
the input against. \n NB: CreateMatchExpression or
MustCreateMatchExpression() should be used for creating
new instances. \n NB: Validate() must be called if
Op or Value fields are modified or if a new instance
is created from scratch without using the helper functions."
properties:
op:
description: Op is the operator to be applied.
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
description: Value is the list of values that the
operand evaluates the input against. Value should
be empty if the operator is Exists, DoesNotExist,
IsTrue or IsFalse. Value should contain exactly
one element if the operator is Gt or Lt and exactly
two elements if the operator is GtLt. In other
cases Value should contain at least one element.
items:
type: string
type: array
required:
- op
type: object
description: MatchExpressionSet contains a set of MatchExpressions,
each of which is evaluated against a set of input values.
type: object
required:
- feature
- matchExpressions
type: object
type: array
name:
description: Name of the rule.
type: string
taints:
description: Taints to create if the rule matches.
items:
description: The node this Taint is attached to has the "effect"
on any pod that does not tolerate the Taint.
properties:
effect:
description: Required. The effect of the taint on pods
that do not tolerate the taint. Valid effects are NoSchedule,
PreferNoSchedule and NoExecute.
type: string
key:
description: Required. The taint key to be applied to
a node.
type: string
timeAdded:
description: TimeAdded represents the time at which the
taint was added. It is only written for NoExecute taints.
format: date-time
type: string
value:
description: The taint value corresponding to the taint
key.
type: string
required:
- effect
- key
type: object
type: array
vars:
additionalProperties:
type: string
description: Vars is the variables to store if the rule matches.
Variables do not directly inflict any changes in the node
object. However, they can be referenced from other rules enabling
more complex rule hierarchies, without exposing intermediary
output values as labels.
type: object
varsTemplate:
description: VarsTemplate specifies a template to expand for
dynamically generating multiple variables. Data (after template
expansion) must be keys with an optional value (<key>[=<value>])
separated by newlines.
type: string
required:
- name
type: object
type: array
required:
- rules
type: object
required:
- spec
type: object
served: true
storage: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.9.2
creationTimestamp: null
name: nodefeatures.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
names:
kind: NodeFeature
listKind: NodeFeatureList
plural: nodefeatures
singular: nodefeature
scope: Namespaced
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
description: NodeFeature resource holds the features discovered for one node
in the cluster.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: NodeFeatureSpec describes a NodeFeature object.
properties:
features:
description: Features is the full "raw" features data that has been
discovered.
properties:
attributes:
additionalProperties:
description: AttributeFeatureSet is a set of features having
string value.
properties:
elements:
additionalProperties:
type: string
type: object
required:
- elements
type: object
type: object
flags:
additionalProperties:
description: FlagFeatureSet is a set of simple features only
containing names without values.
properties:
elements:
additionalProperties:
description: Nil is a dummy empty struct for protobuf
compatibility
type: object
type: object
required:
- elements
type: object
type: object
instances:
additionalProperties:
description: InstanceFeatureSet is a set of features each of
which is an instance having multiple attributes.
properties:
elements:
items:
description: InstanceFeature represents one instance of
a complex features, e.g. a device.
properties:
attributes:
additionalProperties:
type: string
type: object
required:
- attributes
type: object
type: array
required:
- elements
type: object
type: object
required:
- attributes
- flags
- instances
type: object
labels:
additionalProperties:
type: string
description: Labels is the set of node labels that are requested to
be created.
type: object
required:
- features
type: object
required:
- spec
type: object
served: true
storage: true
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: gpu-operator
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: gpu-operator
app.kubernetes.io/version: v23.3.2
helm.sh/chart: gpu-operator-v23.3.2
name: gpu-operator
namespace: gpu-operator
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: node-feature-discovery
namespace: gpu-operator
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: nvidia-gpu-operator-node-feature-discovery-worker
namespace: gpu-operator
rules:
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeatures
verbs:
- create
- get
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: gpu-operator
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: gpu-operator
app.kubernetes.io/version: v23.3.2
helm.sh/chart: gpu-operator-v23.3.2
name: gpu-operator
rules:
- apiGroups:
- config.openshift.io
resources:
- proxies
verbs:
- get
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
- clusterroles
- clusterrolebindings
verbs:
- '*'
- apiGroups:
- ""
resources:
- pods
- services
- endpoints
- persistentvolumeclaims
- events
- configmaps
- secrets
- serviceaccounts
- nodes
verbs:
- '*'
- apiGroups:
- ""
resources:
- namespaces
verbs:
- get
- list
- create
- watch
- update
- patch
- apiGroups:
- apps
resources:
- deployments
- daemonsets
- replicasets
- statefulsets
verbs:
- '*'
- apiGroups:
- apps
resources:
- controllerrevisions
verbs:
- get
- list
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
- prometheusrules
verbs:
- get
- list
- create
- watch
- update
- delete
- apiGroups:
- nvidia.com
resources:
- '*'
verbs:
- '*'
- apiGroups:
- scheduling.k8s.io
resources:
- priorityclasses
verbs:
- get
- list
- watch
- create
- apiGroups:
- security.openshift.io
resources:
- securitycontextconstraints
verbs:
- '*'
- apiGroups:
- policy
resourceNames:
- gpu-operator-restricted
resources:
- podsecuritypolicies
verbs:
- use
- apiGroups:
- policy
resources:
- podsecuritypolicies
verbs:
- create
- get
- update
- list
- delete
- apiGroups:
- config.openshift.io
resources:
- clusterversions
verbs:
- get
- list
- watch
- apiGroups:
- ""
- coordination.k8s.io
resources:
- configmaps
- leases
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- node.k8s.io
resources:
- runtimeclasses
verbs:
- get
- list
- create
- update
- watch
- delete
- apiGroups:
- image.openshift.io
resources:
- imagestreams
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
- pods/eviction
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- create
- update
- patch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- get
- list
- watch
- update
- patch
- create
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: nvidia-gpu-operator-node-feature-discovery
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- patch
- update
- list
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeatures
- nodefeaturerules
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: nvidia-gpu-operator-node-feature-discovery-worker
namespace: gpu-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: nvidia-gpu-operator-node-feature-discovery-worker
subjects:
- kind: ServiceAccount
name: node-feature-discovery
namespace: gpu-operator
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: gpu-operator
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: gpu-operator
app.kubernetes.io/version: v23.3.2
helm.sh/chart: gpu-operator-v23.3.2
name: gpu-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gpu-operator
subjects:
- kind: ServiceAccount
name: gpu-operator
namespace: gpu-operator
- kind: ServiceAccount
name: node-feature-discovery
namespace: gpu-operator
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: nvidia-gpu-operator-node-feature-discovery
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: nvidia-gpu-operator-node-feature-discovery
subjects:
- kind: ServiceAccount
name: node-feature-discovery
namespace: gpu-operator
---
apiVersion: v1
data:
nfd-topology-updater.conf: "null"
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: nvidia-gpu-operator-node-feature-discovery-topology-updater-conf
namespace: gpu-operator
---
apiVersion: v1
data:
nfd-worker.conf: |-
sources:
pci:
deviceClassWhitelist:
- "02"
- "0200"
- "0207"
- "0300"
- "0302"
deviceLabelFields:
- vendor
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
name: nvidia-gpu-operator-node-feature-discovery-worker-conf
namespace: gpu-operator
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
role: master
name: nvidia-gpu-operator-node-feature-discovery-master
namespace: gpu-operator
spec:
ports:
- name: grpc
port: 8080
protocol: TCP
targetPort: grpc
selector:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: master
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: gpu-operator
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: gpu-operator
app.kubernetes.io/version: v23.3.2
helm.sh/chart: gpu-operator-v23.3.2
nvidia.com/gpu-driver-upgrade-drain.skip: "true"
name: gpu-operator
namespace: gpu-operator
spec:
replicas: 1
selector:
matchLabels:
app: gpu-operator
app.kubernetes.io/component: gpu-operator
template:
metadata:
annotations:
openshift.io/scc: restricted-readonly
labels:
app: gpu-operator
app.kubernetes.io/component: gpu-operator
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: gpu-operator
app.kubernetes.io/version: v23.3.2
helm.sh/chart: gpu-operator-v23.3.2
nvidia.com/gpu-driver-upgrade-drain.skip: "true"
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/master
operator: In
values:
- ""
weight: 1
containers:
- args:
- --leader-elect
- --zap-time-encoding=epoch
- --zap-log-level=info
command:
- gpu-operator
env:
- name: WATCH_NAMESPACE
value: ""
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: nvcr.io/nvidia/gpu-operator:v23.3.2
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
name: gpu-operator
ports:
- containerPort: 8080
name: metrics
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
resources:
limits:
cpu: 500m
memory: 350Mi
requests:
cpu: 200m
memory: 100Mi
volumeMounts:
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
priorityClassName: system-node-critical
serviceAccountName: gpu-operator
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ""
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ""
volumes:
- hostPath:
path: /etc/os-release
name: host-os-release
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
role: master
name: nvidia-gpu-operator-node-feature-discovery-master
namespace: gpu-operator
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: master
template:
metadata:
annotations: {}
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: master
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/master
operator: In
values:
- ""
weight: 1
- preference:
matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: In
values:
- ""
weight: 1
containers:
- args:
- -enable-nodefeature-api
- --extra-label-ns=nvidia.com
- -featurerules-controller=true
command:
- nfd-master
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
image: registry.k8s.io/nfd/node-feature-discovery:v0.12.1
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
command:
- /usr/bin/grpc_health_probe
- -addr=:8080
initialDelaySeconds: 10
periodSeconds: 10
name: master
ports:
- containerPort: 8080
name: grpc
readinessProbe:
exec:
command:
- /usr/bin/grpc_health_probe
- -addr=:8080
failureThreshold: 10
initialDelaySeconds: 5
periodSeconds: 10
resources: {}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
securityContext: {}
serviceAccountName: node-feature-discovery
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ""
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ""
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.12.1
helm.sh/chart: node-feature-discovery-0.12.1
role: worker
name: nvidia-gpu-operator-node-feature-discovery-worker
namespace: gpu-operator
spec:
selector:
matchLabels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: worker
template:
metadata:
annotations: {}
labels:
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: worker
spec:
containers:
- args:
- --server=nvidia-gpu-operator-node-feature-discovery-master:8080
- -enable-nodefeature-api
command:
- nfd-worker
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
image: registry.k8s.io/nfd/node-feature-discovery:v0.12.1
imagePullPolicy: IfNotPresent
name: worker
resources: {}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /host-boot
name: host-boot
readOnly: true
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
- mountPath: /host-sys
name: host-sys
readOnly: true
- mountPath: /host-usr/lib
name: host-usr-lib
readOnly: true
- mountPath: /etc/kubernetes/node-feature-discovery/source.d/
name: source-d
readOnly: true
- mountPath: /etc/kubernetes/node-feature-discovery/features.d/
name: features-d
readOnly: true
- mountPath: /etc/kubernetes/node-feature-discovery
name: nfd-worker-conf
readOnly: true
dnsPolicy: ClusterFirstWithHostNet
securityContext: {}
serviceAccountName: node-feature-discovery
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ""
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
volumes:
- hostPath:
path: /boot
name: host-boot
- hostPath:
path: /etc/os-release
name: host-os-release
- hostPath:
path: /sys
name: host-sys
- hostPath:
path: /usr/lib
name: host-usr-lib
- hostPath:
path: /etc/kubernetes/node-feature-discovery/source.d/
name: source-d
- hostPath:
path: /etc/kubernetes/node-feature-discovery/features.d/
name: features-d
- configMap:
items:
- key: nfd-worker.conf
path: nfd-worker.conf
name: nvidia-gpu-operator-node-feature-discovery-worker-conf
name: nfd-worker-conf
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/name: dcgm-exporter
app.kubernetes.io/version: 2.6.10
release: kube-prometheus-stack
name: dcgm-exporter
namespace: gpu-operator
spec:
endpoints:
- interval: 100ms
path: /metrics
port: gpu-metrics
selector:
matchLabels:
app: nvidia-dcgm-exporter
---
apiVersion: nvidia.com/v1
kind: ClusterPolicy
metadata:
labels:
app.kubernetes.io/component: gpu-operator
app.kubernetes.io/instance: nvidia-gpu-operator
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: gpu-operator
app.kubernetes.io/version: v23.3.2
helm.sh/chart: gpu-operator-v23.3.2
name: cluster-policy
namespace: gpu-operator
spec:
cdi:
default: false
enabled: false
daemonsets:
labels:
app.kubernetes.io/managed-by: gpu-operator
helm.sh/chart: gpu-operator-v23.3.2
priorityClassName: system-node-critical
rollingUpdate:
maxUnavailable: "1"
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
updateStrategy: RollingUpdate
dcgm:
enabled: false
hostPort: 5555
image: dcgm
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: 3.1.7-1-ubuntu20.04
dcgmExporter:
enabled: true
env:
- name: DCGM_EXPORTER_LISTEN
value: :9400
- name: DCGM_EXPORTER_KUBERNETES
value: "true"
- name: DCGM_EXPORTER_COLLECTORS
value: /etc/dcgm-exporter/dcp-metrics-included.csv
image: dcgm-exporter
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/k8s
serviceMonitor:
additionalLabels: {}
enabled: false
honorLabels: false
interval: 15s
version: 3.1.7-3.1.4-ubuntu20.04
devicePlugin:
enabled: true
env:
- name: PASS_DEVICE_SPECS
value: "true"
- name: FAIL_ON_INIT_ERROR
value: "true"
- name: DEVICE_LIST_STRATEGY
value: envvar
- name: DEVICE_ID_STRATEGY
value: uuid
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: all
image: k8s-device-plugin
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia
version: v0.14.0-ubi8
driver:
certConfig:
name: ""
enabled: true
image: driver
imagePullPolicy: IfNotPresent
kernelModuleConfig:
name: ""
licensingConfig:
configMapName: ""
nlsEnabled: false
manager:
env:
- name: ENABLE_GPU_POD_EVICTION
value: "true"
- name: ENABLE_AUTO_DRAIN
value: "false"
- name: DRAIN_USE_FORCE
value: "false"
- name: DRAIN_POD_SELECTOR_LABEL
value: ""
- name: DRAIN_TIMEOUT_SECONDS
value: 0s
- name: DRAIN_DELETE_EMPTYDIR_DATA
value: "false"
image: k8s-driver-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.6.1
rdma:
enabled: false
useHostMofed: false
repoConfig:
configMapName: ""
repository: nvcr.io/nvidia
startupProbe:
failureThreshold: 120
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 60
upgradePolicy:
autoUpgrade: true
drain:
deleteEmptyDir: false
enable: false
force: false
timeoutSeconds: 300
maxParallelUpgrades: 1
maxUnavailable: 25%
podDeletion:
deleteEmptyDir: false
force: false
timeoutSeconds: 300
waitForCompletion:
timeoutSeconds: 0
usePrecompiled: false
version: 525.105.17
virtualTopology:
config: ""
gfd:
enabled: true
env:
- name: GFD_SLEEP_INTERVAL
value: 60s
- name: GFD_FAIL_ON_INIT_ERROR
value: "true"
image: gpu-feature-discovery
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia
version: v0.8.0-ubi8
mig:
strategy: single
migManager:
config:
default: all-disabled
name: default-mig-parted-config
enabled: true
env:
- name: WITH_REBOOT
value: "false"
gpuClientsConfig:
name: ""
image: k8s-mig-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.5.2-ubuntu20.04
nodeStatusExporter:
enabled: false
image: gpu-operator-validator
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v23.3.2
operator:
defaultRuntime: docker
initContainer:
image: cuda
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia
version: 12.1.1-base-ubi8
runtimeClass: nvidia
psp:
enabled: false
sandboxDevicePlugin:
enabled: true
image: kubevirt-gpu-device-plugin
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia
version: v1.2.1
sandboxWorkloads:
defaultWorkload: container
enabled: false
toolkit:
enabled: true
image: container-toolkit
imagePullPolicy: IfNotPresent
installDir: /usr/local/nvidia
repository: nvcr.io/nvidia/k8s
version: v1.13.0-ubuntu20.04
validator:
image: gpu-operator-validator
imagePullPolicy: IfNotPresent
plugin:
env:
- name: WITH_WORKLOAD
value: "true"
repository: nvcr.io/nvidia/cloud-native
version: v23.3.2
vfioManager:
driverManager:
env:
- name: ENABLE_GPU_POD_EVICTION
value: "false"
- name: ENABLE_AUTO_DRAIN
value: "false"
image: k8s-driver-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.6.1
enabled: true
image: cuda
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia
version: 12.1.1-base-ubi8
vgpuDeviceManager:
config:
default: default
name: ""
enabled: true
image: vgpu-device-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.2.1
vgpuManager:
driverManager:
env:
- name: ENABLE_GPU_POD_EVICTION
value: "false"
- name: ENABLE_AUTO_DRAIN
value: "false"
image: k8s-driver-manager
imagePullPolicy: IfNotPresent
repository: nvcr.io/nvidia/cloud-native
version: v0.6.1
enabled: false
image: vgpu-manager
imagePullPolicy: IfNotPresent
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment