Skip to content

Instantly share code, notes, and snippets.

View wbuchwalter's full-sized avatar
🦙

William Buchwalter wbuchwalter

🦙
View GitHub Profile
apiVersion: "mlkube.io/v1beta1"
kind: TfJob
metadata:
name: whatev
spec:
replica_specs:
- replicas: 1
tf_port: 2222
tf_replica_type: MASTER
template:
apiVersion: batch/v1
kind: Job
metadata:
name: nvidia-smi
spec:
template:
metadata:
name: nvidia-smi
spec:
restartPolicy: Never
apiVersion: "mlkube.io/v1beta1"
kind: "TfJob"
metadata:
name: {{ .Release.Name }}
spec:
replica_specs:
- replicas: 1
tf_port: 2222
tf_replica_type: MASTER
template:
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: tensorflow
name: tensorflow
spec:
template:
metadata:
labels:
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: tensorflow
name: tensorflow
spec:
template:
metadata:
labels:
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: autoscaler
spec:
replicas: 1
template:
metadata:
labels:
app: autoscaler
apiVersion: v1
kind: Service
metadata:
labels:
app: nginx
name: nginx
spec:
ports:
- port: 80
targetPort: 80
#!/bin/bash
apt-get update
apt-get install -y gcc
apt-get install -y make
apt-get install -qqy linux-headers-`uname -r`
wget -P /tools http://us.download.nvidia.com/XFree86/Linux-x86_64/375.20/NVIDIA-Linux-x86_64-375.20.run
chmod +x /tools/NVIDIA-Linux-x86_64-375.20.run
sh /tools/NVIDIA-Linux-x86_64-375.20.run -a -s
apiVersion: v1
kind: Service
metadata:
labels:
app: tensorboard
name: tensorboard
spec:
ports:
- port: 80
targetPort: 6006
def save_model(saver, sess, counter, path):
if not os.path.isdir(path):
os.makedirs(path)
path = os.path.join(path, 'model.ckpt')
saver.save(sess, path, global_step=counter)
return path
if np.mod(counter, 50) == 0:
save_path = save_model(saver, sess, counter, log_directory)