Zodiase · October 18, 2016 21:10 · Zodiase · Jul 15, 2016 · Zodiase · Jul 15, 2016
diff --git a/config.py b/config.py
 # =============================================================================
 #
 # In order for this extractor to run according to your preferences, 
 # the following parameters need to be set. 
 # 
 # Some parameters can be left with the default values provided here - in that 
 # case it is important to verify that the default value is appropriate to 
 # your system. It is especially important to verify that paths to files and 
 # software applications are valid in your system.
 #
 # =============================================================================

 import os

 # name to show in rabbitmq queue list
 extractorName = os.getenv('RABBITMQ_QUEUE', "terra.hyperspectral")

 # URL to be used for connecting to rabbitmq
 rabbitmqURL = os.getenv('RABBITMQ_URI', "amqp://guest:guest@localhost/%2f")

 # name of rabbitmq exchange
 rabbitmqExchange = os.getenv('RABBITMQ_EXCHANGE', "clowder")

 # type of files to process
 messageType = "*.dataset.file.added"

 # trust certificates, set this to false for self signed certificates
 sslVerify = os.getenv('RABBITMQ_SSLVERIFY', False)

 # Location of terraref.sh
 workerScript = os.getenv('WORKER_SCRIPT', "terraref.sh")

 # Workspace for input/output files.
 inputDirectory = os.getenv('INPUTDIR', "./input")
 outputDirectory = os.getenv('OUTPUTDIR', "./output")

 # The extractor will only run when all these files are present.
 # These are just filename postfixes for file matching.
 # A few other things depend on the `_raw` file.
 requiredInputFiles = [
 	'_raw',
 	'_raw.hdr',
 	'_image.jpg',
 	'_metadata.json',
 	'_frameIndex.txt',
 	'_settings.txt'
 ]
diff --git a/Dockerfile b/Dockerfile
 # Dockerfile for the TerraRef hyperspectral image conversion extractor
 # August 17, 2016
 FROM ubuntu:14.04
 MAINTAINER Yan Y. Liu <[email protected]>

 # install common libraries and python modules
 USER root
 RUN apt-get update
 RUN apt-get upgrade -y -q 
 RUN apt-get install -y -q build-essential m4 swig antlr libantlr-dev udunits-bin libudunits2-dev unzip cmake wget git libjpeg-dev libpng-dev libtiff-dev
 RUN apt-get install -y -q python-dev python-numpy python-pip python-virtualenv
 # set up dirs for user installed software
 RUN useradd -m -s /bin/bash ubuntu
 RUN mkdir /srv/downloads && chown -R ubuntu: /srv/downloads && \
    mkdir /srv/sw && chown -R ubuntu: /srv/sw

 USER ubuntu
 # set env vars for common libraries and python paths
 ENV PYTHONPATH="/usr/lib/python2.7/dist-packages:${PYTHONPATH}"

 ## install from source 

 # hdf5
 RUN cd /srv/downloads && \
    wget -q https://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8.17/src/hdf5-1.8.17.tar.gz && \
    tar xfz hdf5-1.8.17.tar.gz && \
    cd hdf5-1.8.17 && \
    ./configure --prefix=/srv/sw/hdf5-1.8.17 && \
    make && make install
 ENV PATH="/srv/sw/hdf5-1.8.17/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/hdf5-1.8.17/lib:${LD_LIBRARY_PATH}"

 # netcdf4
 RUN cd /srv/downloads && \
    wget -q ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4.4.1.tar.gz && \
    tar xfz netcdf-4.4.1.tar.gz && \
    cd netcdf-4.4.1 && \
    CFLAGS="-I/srv/sw/hdf5-1.8.17/include " LDFLAGS=" -L/srv/sw/hdf5-1.8.17/lib " LIBS=" -lhdf5 -lhdf5_hl " ./configure --prefix=/srv/sw/netcdf-4.4.1 --enable-netcdf4 && \
    make && make install
 ENV PATH="/srv/sw/netcdf-4.4.1/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/netcdf-4.4.1/lib:${LD_LIBRARY_PATH}"

 # geos
 RUN cd /srv/downloads && \
    wget -q http://download.osgeo.org/geos/geos-3.5.0.tar.bz2 && \
    tar xfj geos-3.5.0.tar.bz2 && \
    cd geos-3.5.0 && \
    ./configure --prefix=/srv/sw/geos --enable-python && \
    make && make install
 ENV PATH="/srv/sw/geos/bin:${PATH}" \
    PYTHONPATH="/srv/sw/geos/lib/python2.7/site-packages:${PYTHONPATH}" \
    LD_LIBRARY_PATH="/srv/sw/geos/lib:${LD_LIBRARY_PATH}"

 # proj4
 RUN cd /srv/downloads && \
    wget -q https://github.com/OSGeo/proj.4/archive/4.9.2.tar.gz -O proj.4-4.9.2.tar.gz && \
    tar xfz proj.4-4.9.2.tar.gz && \
    cd proj.4-4.9.2 && \
    ./configure --prefix=/srv/sw/proj4  && \
    make && make install
 ENV PATH="/srv/sw/proj4/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/proj4/lib:${LD_LIBRARY_PATH}"

 # gdal
 RUN cd /srv/downloads && \
    wget -q http://download.osgeo.org/gdal/2.1.1/gdal-2.1.1.tar.gz && \
    tar xfz gdal-2.1.1.tar.gz && \
    cd gdal-2.1.1 && \
    ./configure --with-libtiff=internal --with-geotiff=internal --with-png=internal --with-jpeg=internal --with-gif=internal --without-curl --with-python --with-hdf5=/srv/sw/hdf5-1.8.17 --with-netcdf=/srv/sw/netcdf-4.4.1 --with-geos=/srv/sw/geos/bin/geos-config --with-threads --prefix=/srv/sw/gdal && \
    make && make install
 ENV PATH="/srv/sw/gdal/bin:${PATH}" \
    PYTHONPATH="/srv/sw/gdal/lib/python2.7/site-packages:${PYTHONPATH}" \
    LD_LIBRARY_PATH="/srv/sw/gdal/lib:${LD_LIBRARY_PATH}"

 # nco
 RUN cd /srv/downloads && \
    wget -q https://github.com/nco/nco/archive/4.6.1.tar.gz -O nco-4.6.1.tar.gz && \
    tar xfz nco-4.6.1.tar.gz && \
    cd nco-4.6.1 && \
    ./configure NETCDF_ROOT=/srv/sw/netcdf-4.4.1 --prefix=/srv/sw/nco-4.6.1 --enable-ncap2 --enable-udunits2 && \
    make && make install
 ENV PATH="/srv/sw/nco-4.6.1/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/nco-4.6.1/lib:${LD_LIBRARY_PATH}"

 ENV USERHOME="/home/ubuntu"
 WORKDIR "${USERHOME}"

 ## install pyclowder 
 # install python modules
 RUN cd ${USERHOME} && \
    virtualenv pyenv && \
    . pyenv/bin/activate && \
    pip install pika && \
    CC=gcc CXX=g++ USE_SETUPCFG=0 HDF5_INCDIR=/srv/sw/hdf5-1.8.17/include HDF5_LIBDIR=/srv/sw/hdf5-1.8.17/lib NETCDF4_INCDIR=/srv/sw/netcdf-4.4.1/include NETCDF4_LIBDIR=/srv/sw/netcdf-4.4.1/lib pip install netCDF4 && \
    pip install git+https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git@bugfix/CATS-554-add-pyclowder-support-for-dataset && \
    deactivate

 ## install hyperspectral image converter script
 ENV PIPELINEDIR="${USERHOME}/computing-pipeline"
 RUN git clone https://github.com/terraref/computing-pipeline.git "${PIPELINEDIR}"

 ## create workspace directories
 ENV INPUTDIR="${USERHOME}/input" \
    OUTPUTDIR="${USERHOME}/output"
 RUN mkdir -p "${INPUTDIR}" && \
    mkdir -p "${OUTPUTDIR}" && \
    mkdir -p "${USERHOME}/logs" \
    mkdir -p "${USERHOME}/test-data"

 ## download test input data
 RUN wget -q http://141.142.168.44/nfiedata/yanliu/terraref-hyperspectral-input-sample.tgz && \
    tar -xf terraref-hyperspectral-input-sample.tgz -C "./test-data" --strip-components 1

 ## install extractor
 ENV RABBITMQ_URI="" \
    RABBITMQ_EXCHANGE="clowder" \
    RABBITMQ_VHOST="%2F" \
    RABBITMQ_QUEUE="terra.hyperspectral" \
    WORKER_SCRIPT="${PIPELINEDIR}/scripts/hyperspectral/terraref.sh"
 COPY entrypoint.sh extractor_info.json config.py terra.hyperspectral.py ./
 ENTRYPOINT ["./entrypoint.sh"]
 CMD ["python", "./terra.hyperspectral.py"]
diff --git a/entrypoint.sh b/entrypoint.sh
 #!/bin/bash

 set -e

 # If RabbitMQ URI is not set, use the default credentials; while doing so,
 # handle the linking scenario, where RABBITMQ_PORT_5672 is set.
 if [ "$RABBITMQ_URI" == "" ]; then
    if [ -n $RABBITMQ_PORT_5672 ]; then
        RABBITMQ_URI="amqp://guest:guest@${RABBITMQ_PORT_5672_TCP_ADDR}:${RABBITMQ_PORT_5672_TCP_PORT}/%2F"
    else
        RABBITMQ_URI="amqp://guest:guest@localhost:5672/%2F"
    fi
 fi

 . pyenv/bin/activate

 printf "exec %s \n\n" "$@"
 exec "$@"
diff --git a/extractor.py b/extractor.py
 #!/usr/bin/env python
 import os
 import subprocess
 import logging
 from config import *
 import pyclowder.extractors as extractors


 def main():
 	global extractorName, messageType, rabbitmqExchange, rabbitmqURL

 	# Set logging
 	logging.basicConfig(format='%(levelname)-7s : %(name)s -  %(message)s', level=logging.WARN)
 	logging.getLogger('pyclowder.extractors').setLevel(logging.INFO)

 	# Connect to rabbitmq
 	extractors.connect_message_bus(
 		extractorName        = extractorName,
 		messageType          = messageType,
 		rabbitmqExchange     = rabbitmqExchange,
 		rabbitmqURL          = rabbitmqURL,
 		processFileFunction  = process_dataset,
 		checkMessageFunction = check_message
 	)

 def check_message(parameters):
 	# Check for expected input files before beginning processing
 	if has_all_files(parameters):
 		if has_output_file(parameters):
 			print 'skipping, output file already exists'
 			return False
 		else:
 			# Handle the message but do not download any files automatically.
 			return "bypass"
 	else:
 		print 'skipping, not all input files are ready'
 		return False

 # ----------------------------------------------------------------------
 # Process the dataset message and upload the results
 def process_dataset(parameters):
 	global extractorName, workerScript, inputDirectory, outputDirectory

 	# Find input files in dataset
 	files = get_all_files(parameters)

 	# Download files to input directory
 	for fileExt in files:
 		files[fileExt]['path'] = extractors.download_file(
 			channel            = parameters['channel'],
 			header             = parameters['header'],
 			host               = parameters['host'],
 			key                = parameters['secretKey'],
 			fileid             = files[fileExt]['id'],
 			# What's this argument for?
 			intermediatefileid = files[fileExt]['id'],
 			ext                = fileExt
 		)
 		# Restore temp filenames to original - script requires specific name formatting so tmp names aren't suitable
 		files[fileExt]['old_path'] = files[fileExt]['path']
 		files[fileExt]['path'] = os.path.join(inputDirectory, files[fileExt]['filename'])
 		os.rename(files[fileExt]['old_path'], files[fileExt]['path'])
 		print 'found %s file: %s' % (fileExt, files[fileExt]['path'])

 	# Invoke terraref.sh
 	outFilePath = os.path.join(outputDirectory, get_output_filename(files['_raw']['filename']))
 	print 'invoking terraref.sh to create: %s' % outFilePath
 	subprocess.call(["bash", workerScript, "-d", "1", "-I", inputDirectory, "-O", outputDirectory])
 	print 'done creating output file'

 	# Verify outfile exists and upload to clowder
 	if os.path.exists(outFilePath):
 		print 'uploading output file...'
 		extractors.upload_file_to_dataset(filepath=outFilePath, parameters=parameters)
 		print 'done uploading'
 	
 	print 'cleaning up...'
 	# Clean up the input files.
 	for fileExt in files:
 		os.remove(files[fileExt]['path'])
 	# Clean up the output file.
 	os.remove(outFilePath)
 	print 'done cleaning'

 # ----------------------------------------------------------------------
 # Find as many expected files as possible and return the set.
 def get_all_files(parameters):
 	files = {
 		'_raw': None,
 		'_raw.hdr': None,
 		'_image.jpg': None,
 		'_metadata.json': None,
 		'_frameIndex.txt': None,
 		'_settings.txt': None
 	}
 	
 	if 'filelist' in parameters:
 		for fileItem in parameters['filelist']:
 			fileId   = fileItem['id']
 			fileName = fileItem['filename']
 			for fileExt in files:
 				if fileName[-len(fileExt):] == fileExt:
 					files[fileExt] = {
 						'id': fileId,
 						'filename': fileName
 					}
 	return files

 # ----------------------------------------------------------------------
 # Returns the output filename.
 def get_output_filename(raw_filename):
 	return '%s.nc' % raw_filename[:-len('_raw')]

 # ----------------------------------------------------------------------
 # Returns true if all expected files are found.
 def has_all_files(parameters):
 	files = get_all_files(parameters)
 	allFilesFound = True
 	for fileExt in files:
 		if files[fileExt] == None:
 			allFilesFound = False
 	return allFilesFound

 # ----------------------------------------------------------------------
 # Returns true if the output file is present.
 def has_output_file(parameters):
 	if 'filelist' not in parameters:
 		return False
 	if not has_all_files(parameters):
 		return False
 	files = get_all_files(parameters)
 	outFilename = get_output_filename(files['_raw']['filename'])
 	outFileFound = False
 	for fileItem in parameters['filelist']:
 		if outFilename == fileItem['filename']:
 			outFileFound = True
 			break
 	return outFileFound

 if __name__ == "__main__":
 	main()
	# =============================================================================
	#
	# In order for this extractor to run according to your preferences,
	# the following parameters need to be set.
	#
	# Some parameters can be left with the default values provided here - in that
	# case it is important to verify that the default value is appropriate to
	# your system. It is especially important to verify that paths to files and
	# software applications are valid in your system.
	#
	# =============================================================================

	import os

	# name to show in rabbitmq queue list
	extractorName = os.getenv('RABBITMQ_QUEUE', "terra.hyperspectral")

	# URL to be used for connecting to rabbitmq
	rabbitmqURL = os.getenv('RABBITMQ_URI', "amqp://guest:guest@localhost/%2f")

	# name of rabbitmq exchange
	rabbitmqExchange = os.getenv('RABBITMQ_EXCHANGE', "clowder")

	# type of files to process
	messageType = "*.dataset.file.added"

	# trust certificates, set this to false for self signed certificates
	sslVerify = os.getenv('RABBITMQ_SSLVERIFY', False)

	# Location of terraref.sh
	workerScript = os.getenv('WORKER_SCRIPT', "terraref.sh")

	# Workspace for input/output files.
	inputDirectory = os.getenv('INPUTDIR', "./input")
	outputDirectory = os.getenv('OUTPUTDIR', "./output")

	# The extractor will only run when all these files are present.
	# These are just filename postfixes for file matching.
	# A few other things depend on the `_raw` file.
	requiredInputFiles = [
	'_raw',
	'_raw.hdr',
	'_image.jpg',
	'_metadata.json',
	'_frameIndex.txt',
	'_settings.txt'
	]
	# Dockerfile for the TerraRef hyperspectral image conversion extractor
	# August 17, 2016
	FROM ubuntu:14.04
	MAINTAINER Yan Y. Liu <[email protected]>

	# install common libraries and python modules
	USER root
	RUN apt-get update
	RUN apt-get upgrade -y -q
	RUN apt-get install -y -q build-essential m4 swig antlr libantlr-dev udunits-bin libudunits2-dev unzip cmake wget git libjpeg-dev libpng-dev libtiff-dev
	RUN apt-get install -y -q python-dev python-numpy python-pip python-virtualenv
	# set up dirs for user installed software
	RUN useradd -m -s /bin/bash ubuntu
	RUN mkdir /srv/downloads && chown -R ubuntu: /srv/downloads && \
	mkdir /srv/sw && chown -R ubuntu: /srv/sw

	USER ubuntu
	# set env vars for common libraries and python paths
	ENV PYTHONPATH="/usr/lib/python2.7/dist-packages:${PYTHONPATH}"

	## install from source

	# hdf5
	RUN cd /srv/downloads && \
	wget -q https://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8.17/src/hdf5-1.8.17.tar.gz && \
	tar xfz hdf5-1.8.17.tar.gz && \
	cd hdf5-1.8.17 && \
	./configure --prefix=/srv/sw/hdf5-1.8.17 && \
	make && make install
	ENV PATH="/srv/sw/hdf5-1.8.17/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/hdf5-1.8.17/lib:${LD_LIBRARY_PATH}"

	# netcdf4
	RUN cd /srv/downloads && \
	wget -q ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4.4.1.tar.gz && \
	tar xfz netcdf-4.4.1.tar.gz && \
	cd netcdf-4.4.1 && \
	CFLAGS="-I/srv/sw/hdf5-1.8.17/include " LDFLAGS=" -L/srv/sw/hdf5-1.8.17/lib " LIBS=" -lhdf5 -lhdf5_hl " ./configure --prefix=/srv/sw/netcdf-4.4.1 --enable-netcdf4 && \
	make && make install
	ENV PATH="/srv/sw/netcdf-4.4.1/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/netcdf-4.4.1/lib:${LD_LIBRARY_PATH}"

	# geos
	RUN cd /srv/downloads && \
	wget -q http://download.osgeo.org/geos/geos-3.5.0.tar.bz2 && \
	tar xfj geos-3.5.0.tar.bz2 && \
	cd geos-3.5.0 && \
	./configure --prefix=/srv/sw/geos --enable-python && \
	make && make install
	ENV PATH="/srv/sw/geos/bin:${PATH}" \
	PYTHONPATH="/srv/sw/geos/lib/python2.7/site-packages:${PYTHONPATH}" \
	LD_LIBRARY_PATH="/srv/sw/geos/lib:${LD_LIBRARY_PATH}"

	# proj4
	RUN cd /srv/downloads && \
	wget -q https://github.com/OSGeo/proj.4/archive/4.9.2.tar.gz -O proj.4-4.9.2.tar.gz && \
	tar xfz proj.4-4.9.2.tar.gz && \
	cd proj.4-4.9.2 && \
	./configure --prefix=/srv/sw/proj4 && \
	make && make install
	ENV PATH="/srv/sw/proj4/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/proj4/lib:${LD_LIBRARY_PATH}"

	# gdal
	RUN cd /srv/downloads && \
	wget -q http://download.osgeo.org/gdal/2.1.1/gdal-2.1.1.tar.gz && \
	tar xfz gdal-2.1.1.tar.gz && \
	cd gdal-2.1.1 && \
	./configure --with-libtiff=internal --with-geotiff=internal --with-png=internal --with-jpeg=internal --with-gif=internal --without-curl --with-python --with-hdf5=/srv/sw/hdf5-1.8.17 --with-netcdf=/srv/sw/netcdf-4.4.1 --with-geos=/srv/sw/geos/bin/geos-config --with-threads --prefix=/srv/sw/gdal && \
	make && make install
	ENV PATH="/srv/sw/gdal/bin:${PATH}" \
	PYTHONPATH="/srv/sw/gdal/lib/python2.7/site-packages:${PYTHONPATH}" \
	LD_LIBRARY_PATH="/srv/sw/gdal/lib:${LD_LIBRARY_PATH}"

	# nco
	RUN cd /srv/downloads && \
	wget -q https://github.com/nco/nco/archive/4.6.1.tar.gz -O nco-4.6.1.tar.gz && \
	tar xfz nco-4.6.1.tar.gz && \
	cd nco-4.6.1 && \
	./configure NETCDF_ROOT=/srv/sw/netcdf-4.4.1 --prefix=/srv/sw/nco-4.6.1 --enable-ncap2 --enable-udunits2 && \
	make && make install
	ENV PATH="/srv/sw/nco-4.6.1/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/nco-4.6.1/lib:${LD_LIBRARY_PATH}"

	ENV USERHOME="/home/ubuntu"
	WORKDIR "${USERHOME}"

	## install pyclowder
	# install python modules
	RUN cd ${USERHOME} && \
	virtualenv pyenv && \
	. pyenv/bin/activate && \
	pip install pika && \
	CC=gcc CXX=g++ USE_SETUPCFG=0 HDF5_INCDIR=/srv/sw/hdf5-1.8.17/include HDF5_LIBDIR=/srv/sw/hdf5-1.8.17/lib NETCDF4_INCDIR=/srv/sw/netcdf-4.4.1/include NETCDF4_LIBDIR=/srv/sw/netcdf-4.4.1/lib pip install netCDF4 && \
	pip install git+https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git@bugfix/CATS-554-add-pyclowder-support-for-dataset && \
	deactivate

	## install hyperspectral image converter script
	ENV PIPELINEDIR="${USERHOME}/computing-pipeline"
	RUN git clone https://github.com/terraref/computing-pipeline.git "${PIPELINEDIR}"

	## create workspace directories
	ENV INPUTDIR="${USERHOME}/input" \
	OUTPUTDIR="${USERHOME}/output"
	RUN mkdir -p "${INPUTDIR}" && \
	mkdir -p "${OUTPUTDIR}" && \
	mkdir -p "${USERHOME}/logs" \
	mkdir -p "${USERHOME}/test-data"

	## download test input data
	RUN wget -q http://141.142.168.44/nfiedata/yanliu/terraref-hyperspectral-input-sample.tgz && \
	tar -xf terraref-hyperspectral-input-sample.tgz -C "./test-data" --strip-components 1

	## install extractor
	ENV RABBITMQ_URI="" \
	RABBITMQ_EXCHANGE="clowder" \
	RABBITMQ_VHOST="%2F" \
	RABBITMQ_QUEUE="terra.hyperspectral" \
	WORKER_SCRIPT="${PIPELINEDIR}/scripts/hyperspectral/terraref.sh"
	COPY entrypoint.sh extractor_info.json config.py terra.hyperspectral.py ./
	ENTRYPOINT ["./entrypoint.sh"]
	CMD ["python", "./terra.hyperspectral.py"]
	#!/bin/bash

	set -e

	# If RabbitMQ URI is not set, use the default credentials; while doing so,
	# handle the linking scenario, where RABBITMQ_PORT_5672 is set.
	if [ "$RABBITMQ_URI" == "" ]; then
	if [ -n $RABBITMQ_PORT_5672 ]; then
	RABBITMQ_URI="amqp://guest:guest@${RABBITMQ_PORT_5672_TCP_ADDR}:${RABBITMQ_PORT_5672_TCP_PORT}/%2F"
	else
	RABBITMQ_URI="amqp://guest:guest@localhost:5672/%2F"
	fi
	fi

	. pyenv/bin/activate

	printf "exec %s \n\n" "$@"
	exec "$@"
	#!/usr/bin/env python
	import os
	import subprocess
	import logging
	from config import *
	import pyclowder.extractors as extractors


	def main():
	global extractorName, messageType, rabbitmqExchange, rabbitmqURL

	# Set logging
	logging.basicConfig(format='%(levelname)-7s : %(name)s - %(message)s', level=logging.WARN)
	logging.getLogger('pyclowder.extractors').setLevel(logging.INFO)

	# Connect to rabbitmq
	extractors.connect_message_bus(
	extractorName = extractorName,
	messageType = messageType,
	rabbitmqExchange = rabbitmqExchange,
	rabbitmqURL = rabbitmqURL,
	processFileFunction = process_dataset,
	checkMessageFunction = check_message
	)

	def check_message(parameters):
	# Check for expected input files before beginning processing
	if has_all_files(parameters):
	if has_output_file(parameters):
	print 'skipping, output file already exists'
	return False
	else:
	# Handle the message but do not download any files automatically.
	return "bypass"
	else:
	print 'skipping, not all input files are ready'
	return False

	# ----------------------------------------------------------------------
	# Process the dataset message and upload the results
	def process_dataset(parameters):
	global extractorName, workerScript, inputDirectory, outputDirectory

	# Find input files in dataset
	files = get_all_files(parameters)

	# Download files to input directory
	for fileExt in files:
	files[fileExt]['path'] = extractors.download_file(
	channel = parameters['channel'],
	header = parameters['header'],
	host = parameters['host'],
	key = parameters['secretKey'],
	fileid = files[fileExt]['id'],
	# What's this argument for?
	intermediatefileid = files[fileExt]['id'],
	ext = fileExt
	)
	# Restore temp filenames to original - script requires specific name formatting so tmp names aren't suitable
	files[fileExt]['old_path'] = files[fileExt]['path']
	files[fileExt]['path'] = os.path.join(inputDirectory, files[fileExt]['filename'])
	os.rename(files[fileExt]['old_path'], files[fileExt]['path'])
	print 'found %s file: %s' % (fileExt, files[fileExt]['path'])

	# Invoke terraref.sh
	outFilePath = os.path.join(outputDirectory, get_output_filename(files['_raw']['filename']))
	print 'invoking terraref.sh to create: %s' % outFilePath
	subprocess.call(["bash", workerScript, "-d", "1", "-I", inputDirectory, "-O", outputDirectory])
	print 'done creating output file'

	# Verify outfile exists and upload to clowder
	if os.path.exists(outFilePath):
	print 'uploading output file...'
	extractors.upload_file_to_dataset(filepath=outFilePath, parameters=parameters)
	print 'done uploading'

	print 'cleaning up...'
	# Clean up the input files.
	for fileExt in files:
	os.remove(files[fileExt]['path'])
	# Clean up the output file.
	os.remove(outFilePath)
	print 'done cleaning'

	# ----------------------------------------------------------------------
	# Find as many expected files as possible and return the set.
	def get_all_files(parameters):
	files = {
	'_raw': None,
	'_raw.hdr': None,
	'_image.jpg': None,
	'_metadata.json': None,
	'_frameIndex.txt': None,
	'_settings.txt': None
	}

	if 'filelist' in parameters:
	for fileItem in parameters['filelist']:
	fileId = fileItem['id']
	fileName = fileItem['filename']
	for fileExt in files:
	if fileName[-len(fileExt):] == fileExt:
	files[fileExt] = {
	'id': fileId,
	'filename': fileName
	}
	return files

	# ----------------------------------------------------------------------
	# Returns the output filename.
	def get_output_filename(raw_filename):
	return '%s.nc' % raw_filename[:-len('_raw')]

	# ----------------------------------------------------------------------
	# Returns true if all expected files are found.
	def has_all_files(parameters):
	files = get_all_files(parameters)
	allFilesFound = True
	for fileExt in files:
	if files[fileExt] == None:
	allFilesFound = False
	return allFilesFound

	# ----------------------------------------------------------------------
	# Returns true if the output file is present.
	def has_output_file(parameters):
	if 'filelist' not in parameters:
	return False
	if not has_all_files(parameters):
	return False
	files = get_all_files(parameters)
	outFilename = get_output_filename(files['_raw']['filename'])
	outFileFound = False
	for fileItem in parameters['filelist']:
	if outFilename == fileItem['filename']:
	outFileFound = True
	break
	return outFileFound

	if __name__ == "__main__":
	main()