Skip to content

Instantly share code, notes, and snippets.

@agitter
Last active July 25, 2023 16:15
Show Gist options
  • Save agitter/fef52bdd45cd04004367b02cd0c39d33 to your computer and use it in GitHub Desktop.
Save agitter/fef52bdd45cd04004367b02cd0c39d33 to your computer and use it in GitHub Desktop.
Template for running TensorFlow in a Docker container on CHTC compute nodes
############################################################################
#
# Condor job that runs a TensorFlow Docker container.
# Based on http://chtc.cs.wisc.edu/helloworld.shtml and
# http://research.cs.wisc.edu/htcondor/HTCondorWeek2016/presentations/FriThain_Docker.pdf
# and https://www.tensorflow.org/get_started/os_setup#docker_installation
#
# Assumes the job is submitted from the WID pool so
# allow flocking to the CHTC pool.
#
############################################################################
Executable = ./test_tf.sh
Universe = docker
# From https://www.tensorflow.org/get_started/os_setup#docker_installation
# Container with the TensorFlow CPU binary image
# Other containers on https://hub.docker.com/
Docker_image = tensorflow/tensorflow
# Include the cluster id and process id that are set at runtime
Log = test_tf_$(Cluster)_$(Process).log
Error = test_tf_$(Cluster)_$(Process).err
Output = test_tf_$(Cluster)_$(Process).out
# File transfer generally needed for non-trivial jobs
# See http://chtc.cs.wisc.edu/file-availability.shtml
# "By default, the submit file executable, output, and error files are
# ALWAYS transferred"
Transfer_input_files = test_tf.sh, test_tf.py
Should_transfer_files = YES
When_to_transfer_output = ON_EXIT
# Allow flocking to the larger CHTC pool
+WantFlocking = true
# Specify the resources required
Request_cpus = 1
Request_memory = 16GB
Request_disk = 1GB
# Copy environment variables that are set dynamically by HTCondor
Environment = "cluster=$(Cluster) process=$(Process) runningon=$$(Name)"
Queue
#END
# From https://www.tensorflow.org/get_started/os_setup#test_the_tensorflow_installation
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))
a = tf.constant(10)
b = tf.constant(32)
print(sess.run(a + b))
# Test file writing and HTCondor file transfer
c = tf.constant(1)
d = tf.constant(10)
with open('test_tf_output.txt', 'w') as out_file:
out_file.write('TensorFlow output: {}'.format(sess.run(c + d)))
#!/bin/bash
# HTCondor job information
echo _CONDOR_JOB_IWD $_CONDOR_JOB_IWD
echo Cluster $cluster
echo Process $process
echo RunningOn $runningon
# check environment
echo $PATH
which python
python test_tf.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment