aespinosa · October 6, 2010 15:46
diff --git a/mk_test.rb b/mk_test.rb
 #!/usr/bin/env ruby

 # File: mk_test.rb
 # Date: 2010-10-06
 # Author: Allan Espinosa
 # Email: [email protected]
 # Description: A Swift workflow generator to test OSG sites through the Engage
 #              VO. Generates the accompanying tc.data and sites.xml as well.
 #              Run with "swift -sites.file sites.xml -tc.file tc.data
 #              test_osg.swift"

 require 'erb'
 require 'ostruct'
 require 'ress'

 swift_tc = %q[
 localhost            echo   /bin/echo                                       INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00"
 <% ctr = 0
   sites.each_key do |name| %>
 <%   jm       = sites[name].jm
     url      = sites[name].url
     app_dir  = sites[name].app_dir
     data_dir = sites[name].data_dir
     throttle = sites[name].throttle %>
 <%=name%>  cat<%=ctr%>    /bin/cat      INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:01:30"
 <%   ctr += 1
   end %>
 ]

 swift_workflow = %q[
 type file;

 app (file t) echo(string i) {
  echo i stdout=@filename(t);
 }

 <% ctr = 0
   sites.each_key do |name| %>
 app (file t) cat<%= ctr %>(file input ) { 
  cat<%= ctr %> @filename(input) stdout=@filename(t);
 }
 <%   ctr += 1
   end %>

 <% ctr = 0
   sites.each_key do |name| %>
 file input<%= ctr %><"cat<%= ctr %>.in">;
 input<%= ctr %> = echo("<%= name %>");
 file out<%= ctr %><"cat<%= ctr %>.out">;
 out<%= ctr %> = cat<%= ctr %>(input<%= ctr %>);
 <%   ctr += 1
   end %>

 ]

 condor_sites = %q[
 <config>
  <pool handle="localhost">
    <filesystem provider="local" />
    <execution provider="local" />
    <workdirectory >/var/tmp</workdirectory>
    <profile namespace="karajan" key="jobThrottle">0</profile>
  </pool>
 <% sites.each_key do |name| %>
 <%   jm       = sites[name].jm
     url      = sites[name].url
     app_dir  = sites[name].app_dir
     data_dir = sites[name].data_dir
     throttle = sites[name].throttle %>

  <pool handle="<%=name%>">
    <execution provider="condor" url="none"/>

    <profile namespace="globus" key="jobType">grid</profile>
    <profile namespace="globus" key="gridResource">gt2 <%=url%>/jobmanager-<%=jm%></profile>

    <profile namespace="karajan" key="initialScore">20.0</profile>
    <profile namespace="karajan" key="jobThrottle"><%=throttle%></profile>

    <gridftp  url="gsiftp://<%=url%>"/>
    <workdirectory><%=data_dir%>/swift_scratch</workdirectory>
  </pool>
 <% end %>
 </config>
 ]

 # Blacklist of non-working sites
 blacklist = [ ]

 # Removes duplicate site entries (i.e. multilpe GRAM endpoints)
 sites = {}
 ress_parse do |name, value|
  next if blacklist.index(name)
  sites[name] = value if sites[name] == nil
 end

 condor_out = File.open("sites.xml", "w")
 tc_out     = File.open("tc.data", "w")
 swift_out = File.open("test_osg.swift", "w")

 condor = ERB.new(condor_sites, 0, "%<>")
 tc     = ERB.new(swift_tc, 3, "%<>")
 swift     = ERB.new(swift_workflow, 0, "%<>")

 condor_out.puts condor.result(binding)
 tc_out.puts tc.result(binding)
 swift_out.puts swift.result(binding)

 condor_out.close
 tc_out.close
 swift_out.close
diff --git a/ress.rb b/ress.rb
 require 'ostruct'

 def ress_query(class_ads)
  cmd = "condor_status -pool engage-submit.renci.org"
  class_ads[0..-2].each do |class_ad|
    cmd << " -format \"%s|\" #{class_ad}"
  end
  cmd << " -format \"%s\\n\" #{class_ads[-1]}"
  `#{cmd}`
 end

 def ress_parse
  dir_suffix = "/engage/swift"
  class_ads  = [
    "GlueSiteUniqueID", "GlueCEInfoHostName", "GlueCEInfoJobManager",
    "GlueCEInfoGatekeeperPort", "GlueCEInfoApplicationDir", "GlueCEInfoDataDir",
    "GlueCEInfoTotalCPUs"
  ]
  ress_query(class_ads).each_line do |line|
    line.chomp!
    set = line.split("|")
    next if not set.size > 0

    value = OpenStruct.new

    value.jm       = set[class_ads.index("GlueCEInfoJobManager")]
    value.url      = set[class_ads.index("GlueCEInfoHostName")]
    value.throttle = (set[class_ads.index("GlueCEInfoTotalCPUs")].to_f - 2.0) / 100.0
    name           = set[class_ads.index("GlueSiteUniqueID")] + "__" +  value.url
    value.name     = set[class_ads.index("GlueSiteUniqueID")]

    value.app_dir = set[class_ads.index("GlueCEInfoApplicationDir")]
    value.app_dir.sub!(/\/$/, "")
    value.data_dir = set[class_ads.index("GlueCEInfoDataDir")]
    value.data_dir.sub!(/\/$/, "")

    value.app_dir = "/osg/app" if name =~ /GridUNESP_CENTRAL/
    value.data_dir = "/osg/data" if name =~ /GridUNESP_CENTRAL/

    if name =~ /BNL-ATLAS/
      value.app_dir += "/engage-scec"
      value.data_dir += "/engage-scec"
    #elsif name == "LIGO_UWM_NEMO" or name == "SMU_PHY" or name == "UFlorida-HPC" or name == "RENCI-Engagement" or name == "RENCI-Blueridge"
      #value.app_dir += "/osg/scec"
      #value.data_dir += "/osg/scec"
    else
      value.app_dir += dir_suffix
      value.data_dir += dir_suffix
    end

    yield name, value
  end
 end


diff --git a/swift.properties b/swift.properties
 sites.file=sites.xml
 tc.file=tc.data

 #
 # The host name of the submit machine is used by GRAM as a callback
 # address to report the status of submitted jobs. In general, Swift
 # can automatically detect the host name of the local machine. 
 # However, if the machine host name is improperly configured or if
 # it does not represent a valid DNS entry, certain services (such as
 # GRAM) will not be able to send job status notifications back to 
 # the client. The value of this property can be an IP address.
 #
 # Format:
 #    hostname=string
 #


 #hostname=localhost

 #
 # A TCP port range can be specified to restrict the ports on which GRAM
 # callback services are started. This is likely needed if your submit
 # host is behind a firewall, in which case the firewall should be 
 # configured to allow incoming connections on ports in the range.
 #
 # Format:
 #     tcp.port.range=start,end
 #

 #tcp.port.range=50000,50100

 #
 # false	- means an error will be immediately reported and cause the
 # 		workflow to abort. At this time remote jobs that are already
 #		running will not be canceled
 # true	- means that Swift will try to do as much work as possible and 
 #		report all errors encountered at the end. However, "errors"
 #		here only applies to job execution errors. Certain errors
 #		that are related to the Swift implementation (should such 
 #		errors occur) will still be reported eagerly.
 #
 # Default: false
 #
 lazy.errors=true

 #
 # What algorithm to use for caching of remote files. LRU (as in what
 # files to purge) is the only implementation right now. One can set
 # a target size (in bytes) for a host by using the swift:storagesize 
 # profile for a host in sites.xml
 #
 # Default: LRU
 #
 caching.algorithm=LRU

 #
 # true       - generate a provenance graph in .dot format (Swift will
 #			 choose a random file name)
 # false      - do not generate a provenance graph 
 # <filename> - generate a provenange graph in the give file name
 #
 # Default: false
 #
 pgraph=false


 #
 # graph properties for the provenance graph (.dot specific) 
 #
 # Default: splines="compound", rankdir="TB"
 #
 pgraph.graph.options=splines="compound", rankdir="TB"


 #
 # node properties for the provenance graph (.dot specific) 
 #
 # Default: color="seagreen", style="filled"
 #
 pgraph.node.options=color="seagreen", style="filled"

 #
 # true	- clustering of small jobs is enabled. Clustering works in the 
 #       following way: If a job is clusterable (meaning that it has the
 #       GLOBUS::maxwalltime profile specified in tc.data and its value
 #       is less than the value of the "clustering.min.time" property) it will
 #       be put in a clustering queue. The queue is processed at intervals 
 #       specified by the "clustering.queue.delay" property. The processing
 #       of the clustering queue consists of selecting compatible jobs and
 #		grouping them in clusters whose max wall time does not exceed twice
 #       the value of the "clustering.min.time" property. Two or more jobs are 
 #       considered compatible if they share the same site and do not have
 #       conflicting profiles (e.g. different values for the same environment
 #       variable). 
 # false	- clustering of small jobs is disabled.
 #
 # Default: false
 #
 clustering.enabled=false


 #
 # <seconds>	- the intervals at which the clustering queue is processed
 #
 # Default: 4
 #
 clustering.queue.delay=60

 #
 # <seconds>	- the threshold time for clustering
 #
 # Default: 60
 #
 clustering.min.time=3600

 #
 # Kickstart is a useful tool that can be used to gather various information
 # about a remote process. Before it can be used it must be installed on the
 # remote site and the corresponding entry be set in the sites file.
 # This option allows controlling of how Swift uses Kickstart. The following
 # values are possible:
 # false - do not use Kickstart
 # true  - use Kickstart. If a job is scheduled on a site that does not have
 #       Kickstart installed, that job will fail.
 # maybe - Use Kickstart if installed (i.e. the entry is present in the sites
 #       file) 
 #
 # Default: maybe
 #

 kickstart.enabled=maybe

 #
 # Indicates when Kickstart records should be fetched from the remote site:
 # true	- always transfer Kickstart records if Kickstart was used (see
 #		kickstart.enabled)
 # false	- only transfer Kickstart records if the job fails
 #
 # Default: false
 #

 kickstart.always.transfer=false

 #
 # Indicates when wrapper logs should be fetched from the remote site:
 # true	- always transfer wrapper logs
 # false	- only transfer wrapper logs if the job fails
 #
 # Default: false
 #

 wrapperlog.always.transfer=false

 ###########################################################################
 #                          Throttling options                             #
 ###########################################################################
 #
 # For the throttling parameters, valid values are either a positive integer
 # or "off" (without the quotes).
 #

 #
 # Limits the number of concurrent submissions for a workflow instance. This
 # throttle only limits the number of concurrent tasks (jobs) that are being
 # sent to sites, not the total number of concurrent jobs that can be run.
 # The submission stage in GRAM is one of the most CPU expensive stages (due
 # mostly to the mutual authentication and delegation). Having too many 
 # concurrent submissions can overload either or both the submit host CPU
 # and the remote host/head node causing degraded performance.     
 #
 # Default: 4
 #

 throttle.submit=4
 #throttle.submit=off

 #
 # Limits the number of concurrent submissions for any of the sites Swift will
 # try to send jobs to. In other words it guarantees that no more than the 
 # value of this throttle jobs sent to any site will be concurrently in a state
 # of being submitted.
 #
 # Default: 2
 #

 throttle.host.submit=2
 #throttle.host.submit=off

 #
 # The Swift scheduler has the ability to limit the number of concurrent jobs
 # allowed on a site based on the performance history of that site. Each site
 # is assigned a score (initially 1), which can increase or decrease based
 # on whether the site yields successful or faulty job runs. The score for a
 # site can take values in the (0.1, 100) interval. The number of allowed jobs
 # is calculated using the following formula: 
 # 	2 + score*throttle.score.job.factor
 # This means a site will always be allowed at least two concurrent jobs and
 # at most 2 + 100*throttle.score.job.factor. With a default of 4 this means
 # at least 2 jobs and at most 402.
 #
 # Default: 4
 #

 throttle.score.job.factor=0.2
 #throttle.score.job.factor=off


 #
 # Limits the total number of concurrent file transfers that can happen at any
 # given time. File transfers consume bandwidth. Too many concurrent transfers
 # can cause the network to be overloaded preventing various other signalling
 # traffic from flowing properly.
 #
 # Default: 4
 #

 throttle.transfers=4
 #throttle.transfers=off

 # Limits the total number of concurrent file operations that can happen at any
 # given time. File operations (like transfers) require an exclusive connection
 # to a site. These connections can be expensive to establish. A large number
 # of concurrent file operations may cause Swift to attempt to establish many 
 # such expensive connections to various sites. Limiting the number of concurrent
 # file operations causes Swift to use a small number of cached connections and
 # achieve better overall performance. 
 # 
 # Default: 8
 #

 throttle.file.operations=8
 #throttle.file.operations=off

 # Indicates whether the working directory on the remote site should be
 # left intact even when the workflow completes successfully. This can be
 # used to inspect the site working directory for debugging purposes.
 #
 # Default: false
 #

 sitedir.keep=false

 # number of time a job will be retried if it fails (giving a maximum of 
 # 1 + execution.retries attempts at execution)
 #

 execution.retries=0


 # Enables/disables replication. Replication is used to deal with jobs sitting
 # in batch queues for abnormally large amounts of time. If replication is enabled
 # and certain conditions are met, Swift creates and submits replicas of jobs, and
 # allows multiple instances of a job to compete.
 #

 replication.enabled=false

 # If replication is enabled, this value specifies the minimum time, in seconds,
 # a job needs to be queued in a batch queue in order to be considered for 
 # replication
 #

 replication.min.queue.time=180

 # The maximum number of replicas that Swift should attempt.

 replication.limit=3

 #
 # WARNING: This option is deprecated. Please use the hostname option.
 #
 # The IP address of the submit machine is used by GRAM as a callback
 # address to report the status of submitted jobs. In general, Swift
 # can automatically detect the IP address of the local machine. 
 # However, if the machine has more than one network interface, Swift
 # will pick the first one, which may not be the right choice. It is
 # recommended that this property is set properly before attempting to
 # run jobs through GRAM.
 #
 # Format:
 #    ip.address=x.y.z.w
 #

 #ip.address=127.0.0.1


 # Controls how Swift will communicate the result code of running user programs
 # from workers to the submit side. In files mode, a file
 # indicating success or failure will be created on the site shared filesystem.
 # In provider mode, the execution provider job status will
 # be used. Notably, GRAM2 does not return job statuses correctly, and so
 # provider mode will not work with GRAM2. With other
 # providers, it can be used to reduce the amount of filesystem access compared
 # to files mode.
 #
 status.mode=files

 # Controls how swift will supply parameters to the remote wrapper script.
 # 'args' mode will pass parameters on the command line
 # 'files' mode will pass parameters through an additional input file
 #
 # valid values: args, files
 # Default: files
 #
 # wrapper.parameter.mode=args

 # Determines if Swift remote wrappers will be executed by specifying an
 # absolute path, or a path relative to the job initial working directory
 #
 # valid values: absolute, relative
 # wrapper.invocation.mode=absolute

 #
 # Limits the number of concurrent iterations that each foreach statement
 # can have at one time. This conserves memory for swift programs that 
 # have large numbers of iterations (which would otherwise all be executed
 # in parallel).
 #
 # Default: 1024
 #

 foreach.max.threads=10000

 # controls whether the log file will contain provenance information
 # enabling this will increase the size of log files, sometimes
 # significantly.

 provenance.log=false

 # Controls whether file staging is done by swift or by the execution 
 # provider. If set to false, the standard swift staging mechanism is
 # used. If set to true, swift does not stage files. Instead, the 
 # execution provider is instructed to stage files in and out.
 # 
 # Provider staging is experimental.
 #
 # When enabled, and when coasters are used as an execution provider,
 # a staging mechanism can be selected for each site
 # using the swift:stagingMethod site profile in sites.xml. The
 # following is a list of accepted mechanisms:
 #
 # * file:  Staging is done from a filesystem accessible to the 
 #          coaster service (typically running on the head node) 
 # * proxy: Staging is done from a filesystem accessible to the
 #          client machine that swift is running on, and is proxied
 #          through the coaster service
 # * sfs:   (short for "shared filesystem") Staging is done by
 #          copying files to and from a filesystem accessible
 #          by the compute node (such as an NFS or GPFS mount).   
 

 use.provider.staging=false
	#!/usr/bin/env ruby

	# File: mk_test.rb
	# Date: 2010-10-06
	# Author: Allan Espinosa
	# Email: [email protected]
	# Description: A Swift workflow generator to test OSG sites through the Engage
	# VO. Generates the accompanying tc.data and sites.xml as well.
	# Run with "swift -sites.file sites.xml -tc.file tc.data
	# test_osg.swift"

	require 'erb'
	require 'ostruct'
	require 'ress'

	swift_tc = %q[
	localhost echo /bin/echo INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00"
	<% ctr = 0
	sites.each_key do \|name\| %>
	<% jm = sites[name].jm
	url = sites[name].url
	app_dir = sites[name].app_dir
	data_dir = sites[name].data_dir
	throttle = sites[name].throttle %>
	<%=name%> cat<%=ctr%> /bin/cat INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:01:30"
	<% ctr += 1
	end %>
	]

	swift_workflow = %q[
	type file;

	app (file t) echo(string i) {
	echo i stdout=@filename(t);
	}

	<% ctr = 0
	sites.each_key do \|name\| %>
	app (file t) cat<%= ctr %>(file input ) {
	cat<%= ctr %> @filename(input) stdout=@filename(t);
	}
	<% ctr += 1
	end %>

	<% ctr = 0
	sites.each_key do \|name\| %>
	file input<%= ctr %><"cat<%= ctr %>.in">;
	input<%= ctr %> = echo("<%= name %>");
	file out<%= ctr %><"cat<%= ctr %>.out">;
	out<%= ctr %> = cat<%= ctr %>(input<%= ctr %>);
	<% ctr += 1
	end %>

	]

	condor_sites = %q[
	<config>
	<pool handle="localhost">
	<filesystem provider="local" />
	<execution provider="local" />
	<workdirectory >/var/tmp</workdirectory>
	<profile namespace="karajan" key="jobThrottle">0</profile>
	</pool>
	<% sites.each_key do \|name\| %>
	<% jm = sites[name].jm
	url = sites[name].url
	app_dir = sites[name].app_dir
	data_dir = sites[name].data_dir
	throttle = sites[name].throttle %>

	<pool handle="<%=name%>">
	<execution provider="condor" url="none"/>

	<profile namespace="globus" key="jobType">grid</profile>
	<profile namespace="globus" key="gridResource">gt2 <%=url%>/jobmanager-<%=jm%></profile>

	<profile namespace="karajan" key="initialScore">20.0</profile>
	<profile namespace="karajan" key="jobThrottle"><%=throttle%></profile>

	<gridftp url="gsiftp://<%=url%>"/>
	<workdirectory><%=data_dir%>/swift_scratch</workdirectory>
	</pool>
	<% end %>
	</config>
	]

	# Blacklist of non-working sites
	blacklist = [ ]

	# Removes duplicate site entries (i.e. multilpe GRAM endpoints)
	sites = {}
	ress_parse do \|name, value\|
	next if blacklist.index(name)
	sites[name] = value if sites[name] == nil
	end

	condor_out = File.open("sites.xml", "w")
	tc_out = File.open("tc.data", "w")
	swift_out = File.open("test_osg.swift", "w")

	condor = ERB.new(condor_sites, 0, "%<>")
	tc = ERB.new(swift_tc, 3, "%<>")
	swift = ERB.new(swift_workflow, 0, "%<>")

	condor_out.puts condor.result(binding)
	tc_out.puts tc.result(binding)
	swift_out.puts swift.result(binding)

	condor_out.close
	tc_out.close
	swift_out.close
	require 'ostruct'

	def ress_query(class_ads)
	cmd = "condor_status -pool engage-submit.renci.org"
	class_ads[0..-2].each do \|class_ad\|
	cmd << " -format \"%s\|\" #{class_ad}"
	end
	cmd << " -format \"%s\\n\" #{class_ads[-1]}"
	`#{cmd}`
	end

	def ress_parse
	dir_suffix = "/engage/swift"
	class_ads = [
	"GlueSiteUniqueID", "GlueCEInfoHostName", "GlueCEInfoJobManager",
	"GlueCEInfoGatekeeperPort", "GlueCEInfoApplicationDir", "GlueCEInfoDataDir",
	"GlueCEInfoTotalCPUs"
	]
	ress_query(class_ads).each_line do \|line\|
	line.chomp!
	set = line.split("\|")
	next if not set.size > 0

	value = OpenStruct.new

	value.jm = set[class_ads.index("GlueCEInfoJobManager")]
	value.url = set[class_ads.index("GlueCEInfoHostName")]
	value.throttle = (set[class_ads.index("GlueCEInfoTotalCPUs")].to_f - 2.0) / 100.0
	name = set[class_ads.index("GlueSiteUniqueID")] + "__" + value.url
	value.name = set[class_ads.index("GlueSiteUniqueID")]

	value.app_dir = set[class_ads.index("GlueCEInfoApplicationDir")]
	value.app_dir.sub!(/\/$/, "")
	value.data_dir = set[class_ads.index("GlueCEInfoDataDir")]
	value.data_dir.sub!(/\/$/, "")

	value.app_dir = "/osg/app" if name =~ /GridUNESP_CENTRAL/
	value.data_dir = "/osg/data" if name =~ /GridUNESP_CENTRAL/

	if name =~ /BNL-ATLAS/
	value.app_dir += "/engage-scec"
	value.data_dir += "/engage-scec"
	#elsif name == "LIGO_UWM_NEMO" or name == "SMU_PHY" or name == "UFlorida-HPC" or name == "RENCI-Engagement" or name == "RENCI-Blueridge"
	#value.app_dir += "/osg/scec"
	#value.data_dir += "/osg/scec"
	else
	value.app_dir += dir_suffix
	value.data_dir += dir_suffix
	end

	yield name, value
	end
	end
	sites.file=sites.xml
	tc.file=tc.data

	#
	# The host name of the submit machine is used by GRAM as a callback
	# address to report the status of submitted jobs. In general, Swift
	# can automatically detect the host name of the local machine.
	# However, if the machine host name is improperly configured or if
	# it does not represent a valid DNS entry, certain services (such as
	# GRAM) will not be able to send job status notifications back to
	# the client. The value of this property can be an IP address.
	#
	# Format:
	# hostname=string
	#


	#hostname=localhost

	#
	# A TCP port range can be specified to restrict the ports on which GRAM
	# callback services are started. This is likely needed if your submit
	# host is behind a firewall, in which case the firewall should be
	# configured to allow incoming connections on ports in the range.
	#
	# Format:
	# tcp.port.range=start,end
	#

	#tcp.port.range=50000,50100

	#
	# false - means an error will be immediately reported and cause the
	# workflow to abort. At this time remote jobs that are already
	# running will not be canceled
	# true - means that Swift will try to do as much work as possible and
	# report all errors encountered at the end. However, "errors"
	# here only applies to job execution errors. Certain errors
	# that are related to the Swift implementation (should such
	# errors occur) will still be reported eagerly.
	#
	# Default: false
	#
	lazy.errors=true

	#
	# What algorithm to use for caching of remote files. LRU (as in what
	# files to purge) is the only implementation right now. One can set
	# a target size (in bytes) for a host by using the swift:storagesize
	# profile for a host in sites.xml
	#
	# Default: LRU
	#
	caching.algorithm=LRU

	#
	# true - generate a provenance graph in .dot format (Swift will
	# choose a random file name)
	# false - do not generate a provenance graph
	# <filename> - generate a provenange graph in the give file name
	#
	# Default: false
	#
	pgraph=false


	#
	# graph properties for the provenance graph (.dot specific)
	#
	# Default: splines="compound", rankdir="TB"
	#
	pgraph.graph.options=splines="compound", rankdir="TB"


	#
	# node properties for the provenance graph (.dot specific)
	#
	# Default: color="seagreen", style="filled"
	#
	pgraph.node.options=color="seagreen", style="filled"

	#
	# true - clustering of small jobs is enabled. Clustering works in the
	# following way: If a job is clusterable (meaning that it has the
	# GLOBUS::maxwalltime profile specified in tc.data and its value
	# is less than the value of the "clustering.min.time" property) it will
	# be put in a clustering queue. The queue is processed at intervals
	# specified by the "clustering.queue.delay" property. The processing
	# of the clustering queue consists of selecting compatible jobs and
	# grouping them in clusters whose max wall time does not exceed twice
	# the value of the "clustering.min.time" property. Two or more jobs are
	# considered compatible if they share the same site and do not have
	# conflicting profiles (e.g. different values for the same environment
	# variable).
	# false - clustering of small jobs is disabled.
	#
	# Default: false
	#
	clustering.enabled=false


	#
	# <seconds> - the intervals at which the clustering queue is processed
	#
	# Default: 4
	#
	clustering.queue.delay=60

	#
	# <seconds> - the threshold time for clustering
	#
	# Default: 60
	#
	clustering.min.time=3600

	#
	# Kickstart is a useful tool that can be used to gather various information
	# about a remote process. Before it can be used it must be installed on the
	# remote site and the corresponding entry be set in the sites file.
	# This option allows controlling of how Swift uses Kickstart. The following
	# values are possible:
	# false - do not use Kickstart
	# true - use Kickstart. If a job is scheduled on a site that does not have
	# Kickstart installed, that job will fail.
	# maybe - Use Kickstart if installed (i.e. the entry is present in the sites
	# file)
	#
	# Default: maybe
	#

	kickstart.enabled=maybe

	#
	# Indicates when Kickstart records should be fetched from the remote site:
	# true - always transfer Kickstart records if Kickstart was used (see
	# kickstart.enabled)
	# false - only transfer Kickstart records if the job fails
	#
	# Default: false
	#

	kickstart.always.transfer=false

	#
	# Indicates when wrapper logs should be fetched from the remote site:
	# true - always transfer wrapper logs
	# false - only transfer wrapper logs if the job fails
	#
	# Default: false
	#

	wrapperlog.always.transfer=false

	###########################################################################
	# Throttling options #
	###########################################################################
	#
	# For the throttling parameters, valid values are either a positive integer
	# or "off" (without the quotes).
	#

	#
	# Limits the number of concurrent submissions for a workflow instance. This
	# throttle only limits the number of concurrent tasks (jobs) that are being
	# sent to sites, not the total number of concurrent jobs that can be run.
	# The submission stage in GRAM is one of the most CPU expensive stages (due
	# mostly to the mutual authentication and delegation). Having too many
	# concurrent submissions can overload either or both the submit host CPU
	# and the remote host/head node causing degraded performance.
	#
	# Default: 4
	#

	throttle.submit=4
	#throttle.submit=off

	#
	# Limits the number of concurrent submissions for any of the sites Swift will
	# try to send jobs to. In other words it guarantees that no more than the
	# value of this throttle jobs sent to any site will be concurrently in a state
	# of being submitted.
	#
	# Default: 2
	#

	throttle.host.submit=2
	#throttle.host.submit=off

	#
	# The Swift scheduler has the ability to limit the number of concurrent jobs
	# allowed on a site based on the performance history of that site. Each site
	# is assigned a score (initially 1), which can increase or decrease based
	# on whether the site yields successful or faulty job runs. The score for a
	# site can take values in the (0.1, 100) interval. The number of allowed jobs
	# is calculated using the following formula:
	# 2 + score*throttle.score.job.factor
	# This means a site will always be allowed at least two concurrent jobs and
	# at most 2 + 100*throttle.score.job.factor. With a default of 4 this means
	# at least 2 jobs and at most 402.
	#
	# Default: 4
	#

	throttle.score.job.factor=0.2
	#throttle.score.job.factor=off


	#
	# Limits the total number of concurrent file transfers that can happen at any
	# given time. File transfers consume bandwidth. Too many concurrent transfers
	# can cause the network to be overloaded preventing various other signalling
	# traffic from flowing properly.
	#
	# Default: 4
	#

	throttle.transfers=4
	#throttle.transfers=off

	# Limits the total number of concurrent file operations that can happen at any
	# given time. File operations (like transfers) require an exclusive connection
	# to a site. These connections can be expensive to establish. A large number
	# of concurrent file operations may cause Swift to attempt to establish many
	# such expensive connections to various sites. Limiting the number of concurrent
	# file operations causes Swift to use a small number of cached connections and
	# achieve better overall performance.
	#
	# Default: 8
	#

	throttle.file.operations=8
	#throttle.file.operations=off

	# Indicates whether the working directory on the remote site should be
	# left intact even when the workflow completes successfully. This can be
	# used to inspect the site working directory for debugging purposes.
	#
	# Default: false
	#

	sitedir.keep=false

	# number of time a job will be retried if it fails (giving a maximum of
	# 1 + execution.retries attempts at execution)
	#

	execution.retries=0


	# Enables/disables replication. Replication is used to deal with jobs sitting
	# in batch queues for abnormally large amounts of time. If replication is enabled
	# and certain conditions are met, Swift creates and submits replicas of jobs, and
	# allows multiple instances of a job to compete.
	#

	replication.enabled=false

	# If replication is enabled, this value specifies the minimum time, in seconds,
	# a job needs to be queued in a batch queue in order to be considered for
	# replication
	#

	replication.min.queue.time=180

	# The maximum number of replicas that Swift should attempt.

	replication.limit=3

	#
	# WARNING: This option is deprecated. Please use the hostname option.
	#
	# The IP address of the submit machine is used by GRAM as a callback
	# address to report the status of submitted jobs. In general, Swift
	# can automatically detect the IP address of the local machine.
	# However, if the machine has more than one network interface, Swift
	# will pick the first one, which may not be the right choice. It is
	# recommended that this property is set properly before attempting to
	# run jobs through GRAM.
	#
	# Format:
	# ip.address=x.y.z.w
	#

	#ip.address=127.0.0.1


	# Controls how Swift will communicate the result code of running user programs
	# from workers to the submit side. In files mode, a file
	# indicating success or failure will be created on the site shared filesystem.
	# In provider mode, the execution provider job status will
	# be used. Notably, GRAM2 does not return job statuses correctly, and so
	# provider mode will not work with GRAM2. With other
	# providers, it can be used to reduce the amount of filesystem access compared
	# to files mode.
	#
	status.mode=files

	# Controls how swift will supply parameters to the remote wrapper script.
	# 'args' mode will pass parameters on the command line
	# 'files' mode will pass parameters through an additional input file
	#
	# valid values: args, files
	# Default: files
	#
	# wrapper.parameter.mode=args

	# Determines if Swift remote wrappers will be executed by specifying an
	# absolute path, or a path relative to the job initial working directory
	#
	# valid values: absolute, relative
	# wrapper.invocation.mode=absolute

	#
	# Limits the number of concurrent iterations that each foreach statement
	# can have at one time. This conserves memory for swift programs that
	# have large numbers of iterations (which would otherwise all be executed
	# in parallel).
	#
	# Default: 1024
	#

	foreach.max.threads=10000

	# controls whether the log file will contain provenance information
	# enabling this will increase the size of log files, sometimes
	# significantly.

	provenance.log=false

	# Controls whether file staging is done by swift or by the execution
	# provider. If set to false, the standard swift staging mechanism is
	# used. If set to true, swift does not stage files. Instead, the
	# execution provider is instructed to stage files in and out.
	#
	# Provider staging is experimental.
	#
	# When enabled, and when coasters are used as an execution provider,
	# a staging mechanism can be selected for each site
	# using the swift:stagingMethod site profile in sites.xml. The
	# following is a list of accepted mechanisms:
	#
	# * file: Staging is done from a filesystem accessible to the
	# coaster service (typically running on the head node)
	# * proxy: Staging is done from a filesystem accessible to the
	# client machine that swift is running on, and is proxied
	# through the coaster service
	# * sfs: (short for "shared filesystem") Staging is done by
	# copying files to and from a filesystem accessible
	# by the compute node (such as an NFS or GPFS mount).


	use.provider.staging=false