creatorrr · June 24, 2021 07:39
diff --git a/README.md b/README.md
diff --git a/gpu-sharing-daemonset.yaml b/gpu-sharing-daemonset.yaml
 # Copyright 2019 Google Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 apiVersion: apps/v1
 kind: DaemonSet
 metadata:
  name: gpu-sharing
  namespace: kube-system
 spec:
  selector:
    matchLabels:
      app: gpu-sharing
  updateStrategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 100%
  template:
    metadata:
      labels:
        app: gpu-sharing
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: cloud.google.com/gke-accelerator
                    operator: Exists
      tolerations:
        - key: "nvidia.com/gpu"
          effect: "NoSchedule"
          operator: "Exists"
      volumes:
        ###
        # dev filesystem from host
        ###
        - name: dev
          hostPath:
            path: /dev
      initContainers:
        ###
        #
        ###
        - name: gpu-sharing
          securityContext:
            privileged: true
          image: alpine:latest
          resources:
            requests:
              cpu: 0.15
          command: ["/bin/sh"]
          args:
            - -ec
            - |
              # Wait for nvidia0 device.
              echo "Waiting for nvidia0"
              until [[ -e /dev/nvidia0 ]]; do sleep 1; done
              echo "Found nvidia0, creating symlinks"

              # Create symlinks to NVIDIA device to support GPU sharing.
              OLD_DEV=/dev/nvidia0
              for i in $(seq 1 $(($NVIDIA_0_SHARE - 1))); do
                NEW_DEV=/dev/nvidia${i}
                echo "Linking $OLD_DEV -> $NEW_DEV"
                ln -sf $OLD_DEV $NEW_DEV
              done

              echo "Done"
          env:
            - name: NVIDIA_0_SHARE
              value: "16"
          volumeMounts:
            - name: dev
              mountPath: /dev
      containers:
        ###
        # pause container
        ###
        - image: "gcr.io/google-containers/pause:2.0"
          name: pause
	# Copyright 2019 Google Inc. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	apiVersion: apps/v1
	kind: DaemonSet
	metadata:
	name: gpu-sharing
	namespace: kube-system
	spec:
	selector:
	matchLabels:
	app: gpu-sharing
	updateStrategy:
	type: RollingUpdate
	rollingUpdate:
	maxUnavailable: 100%
	template:
	metadata:
	labels:
	app: gpu-sharing
	spec:
	affinity:
	nodeAffinity:
	requiredDuringSchedulingIgnoredDuringExecution:
	nodeSelectorTerms:
	- matchExpressions:
	- key: cloud.google.com/gke-accelerator
	operator: Exists
	tolerations:
	- key: "nvidia.com/gpu"
	effect: "NoSchedule"
	operator: "Exists"
	volumes:
	###
	# dev filesystem from host
	###
	- name: dev
	hostPath:
	path: /dev
	initContainers:
	###
	#
	###
	- name: gpu-sharing
	securityContext:
	privileged: true
	image: alpine:latest
	resources:
	requests:
	cpu: 0.15
	command: ["/bin/sh"]
	args:
	- -ec
	- \|
	# Wait for nvidia0 device.
	echo "Waiting for nvidia0"
	until [[ -e /dev/nvidia0 ]]; do sleep 1; done
	echo "Found nvidia0, creating symlinks"

	# Create symlinks to NVIDIA device to support GPU sharing.
	OLD_DEV=/dev/nvidia0
	for i in $(seq 1 $(($NVIDIA_0_SHARE - 1))); do
	NEW_DEV=/dev/nvidia${i}
	echo "Linking $OLD_DEV -> $NEW_DEV"
	ln -sf $OLD_DEV $NEW_DEV
	done

	echo "Done"
	env:
	- name: NVIDIA_0_SHARE
	value: "16"
	volumeMounts:
	- name: dev
	mountPath: /dev
	containers:
	###
	# pause container
	###
	- image: "gcr.io/google-containers/pause:2.0"
	name: pause