Skip to content

Instantly share code, notes, and snippets.

@corburn
Last active August 29, 2015 14:12
Show Gist options
  • Save corburn/689ceccaa0cb3c610e86 to your computer and use it in GitHub Desktop.
Save corburn/689ceccaa0cb3c610e86 to your computer and use it in GitHub Desktop.
Schedules a vcf_to_matrix job and diff the output against a reference folder
#!/bin/bash
#
# Schedules a vcf_to_matrix job and diff the output against a reference folder
#
# The REPO_PATH and VCF_TO_MATRIX variables must be set by the user
#
# REFERENCE_PATH is an output folder from the nasp program. The vcf_to_matrix
# program will read its files and write its output to a folder
# Set Script Name variable
SCRIPT=`basename ${BASH_SOURCE[0]}`
# Set fonts
NORM=`tput sgr0`
BOLD=`tput bold`
REV=`tput smso`
#Initialize variables to default values.
PROFILE_TAG=""
PROFILE_CMD=""
# Path to the git repository the VCF_TO_MATRIX executable was compiled from
# It will be used to tag the output folder with a commit hash suffix
REPO_PATH="$HOME/NASP"
# Absolute path to the vcf_to_matrix executable
VCF_TO_MATRIX="/home/jtravis/.local/bin/vcf_to_matrix"
#Help function
function HELP {
echo -e \\n"Help documentation for ${BOLD}${SCRIPT}.${NORM}"\\n
echo -e "${REV}Basic usage:${NORM} ${BOLD}$SCRIPT path/to/reference/output${NORM}"\\n
echo "Command line switches are optional. The following switches are recognized."
echo "${REV}-p${NORM} --Profile the code with: kernprof, memory_profiler, or mprof"
echo "${REV}-i${NORM} --Set a unique identifier for the job and output. Default is the current repository revision or 'nullref'."
echo "${REV}-g${NORM} --Set the path to the git repository. Default is ${BOLD}${REPO_PATH}${NORM}."
echo -e "${REV}-h${NORM} --Displays this help message. No further functions are performed."\\n
echo -e "Example: ${BOLD}$SCRIPT -a foo -b man -c chu -d bar file.ext${NORM}"\\n
exit 1
}
# If no arguments are passed, print help and exit.
if [ $# -eq 0 ]; then
HELP
fi
# Parse commandline flags
while getopts :p:i:g:h FLAG; do
case $FLAG in
p) # Set profiler. Assumes the profilers are installed and the functions of interest have the @profile decorator.
PROFILE_TAG="$OPTARG"
case $PROFILE_TAG in
"kernprof")
# Profile the command execution time on a per-line basis and write the profile to the $OUTPUT_FOLDER
# -o OUTPUT_FOLDER
# -l line-by-line profiling
PROFILE_CMD="kernprof -o $OUTPUT_FOLDER/vcf_to_matrix.lprof -l"
;;
"memory_profiler")
# Profile the command memory allocation on a per-line basis
PROFILE_TAG="memory_profiler"
PROFILE_CMD="python3 -m memory_profiler"
;;
"mprof")
# Profile the command memory allocation as a plottable function over time
# --python: Enable function timestamps
# --include-children: Monitor forked processes as well
PROFILE_TAG="mprof"
PROFILE_CMD="mprof run --include-children --python"
;;
*)
echo -e "\\nUnknown profiler '${BOLD}$PROFILE_TAG${NORM}'"
HELP
;;
esac
;;
i) # Set unique job and output identifier.
SHA1=$OPTARG
;;
g) # Set path to git repository
REPO_PATH=`readlink -e $OPTARG`
if [ $? -ne 0 ]; then
echo "Directory does not exist: $OPTARG"
exit 1
# TODO: Check if repository exists
#elif ! command -v git > /dev/null; then
#elif [ "`cd $REPO_PATH && git rev-parse --is-inside-work-tree`" == "true" ]; then
fi
;;
h) # Show help
HELP
;;
\?) # Unrecognized option - show help
echo -e \\n"Option -${BOLD}$OPTARG${NORM} not allowed."
HELP
;;
esac
done
# Shift the the positional arguments index after parsing flags so they may be accessed as $1, $2, etc instead of relative to the flags.
shift $((OPTIND-1))
if [ ! -x "$VCF_TO_MATRIX" ]; then
echo -e "\\n${REV}ERROR:${NORM} The VCF_TO_MATRIX is not executable. You must set the variable in the script file."
echo "It is assumed VCF_TO_MATRIX is a development version different from the globally installed version."
exit 1
fi
# If a unique identifier was not given on the commandline, use the current revision from the REPO_PATH or fallback to 'nullref' if either
# git or the repo are undefined.
if [ -z "$SHA1" ]; then
if [ -d "$REPO_PATH" ] && command -v git > /dev/null && [ "`cd $REPO_PATH && git rev-parse --is-inside-work-tree`" == "true" ]; then
SHA1="`cd $REPO_PATH && git rev-parse --short HEAD`"
else
echo "WARNING: failed to determine git hash"
SHA1="nullref"
fi
fi
REFERENCE_FOLDER="`readlink -e $1`"
if [ $? -ne 0 ]; then
echo -e "\\n${REV}ERROR:${NORM} Failed to determine absolute path to the reference folder: $1"
HELP
fi
REFERENCE_DTO_FILE="`readlink -e $1/matrix_dto.xml`"
if [ $? -ne 0 ]; then
echo -e "\\n${REV}ERROR${NORM}: Failed to determine absolute path to reference matrix_dto.xml"
HELP
fi
# The first argument, $1, is a path to the reference folder. The output folder is a copy of
# of the reference folder with a git commit hash suffix. Shell parameter expansion is used
# here to trim the trailing slash from the reference folder path.
# see http://www.gnu.org/software/bash/manual/bashref.html#Shell-Parameter-Expansion
OUTPUT_FOLDER="$(readlink -f ${1%/}.$SHA1.${PROFILE_TAG})"
# If the output folder exists, find an available folder by incrementing a counter at the end of the folder name
i="0"
if [ -d "$OUTPUT_FOLDER" ]; then
while [ -d "${OUTPUT_FOLDER}.${i}" ]; do
i=$[$i+1]
done
OUTPUT_FOLDER=${OUTPUT_FOLDER}.${i}
fi
OUTPUT_DTO_FILE=$OUTPUT_FOLDER/matrix_dto.xml
# Create output folders
mkdir -pv $OUTPUT_FOLDER/{matrices,statistics}
if [ $? -ne 0 ]; then
echo -e "\\n${REV}ERROR:${NORM} Failed to create the output folders"
exit 1
fi
# Replace matrices and statistics paths in matrix_dto.xml with the $OUTPUT_FOLDER/{matrices,statistics} paths
# NOTE: the regex assumes the xml has stats-folder and matrix-folder elements with existing paths
sed -E 's:(<stats-folder>|<matrix-folder>)(.*)/(statistics|matrices)(</stats-folder>|</matrix-folder>):\1'${OUTPUT_FOLDER}'/\3\4:g' $REFERENCE_DTO_FILE > $OUTPUT_DTO_FILE
# Verify the sed regex stats-folder and matrix-folder assumptions
# This could also be accomplished by sed returning an error if no match / the file paths were not changed
# http://stackoverflow.com/questions/15965073/return-code-of-sed-for-no-match
grep -q "<stats-folder>.*statistics</stats-folder>" $REFERENCE_DTO_FILE && grep -q "<matrix-folder>.*</matrix-folder>" $REFERENCE_DTO_FILE
if [ $? -ne 0 ]; then
echo -e "\\n${REV}ERROR:${NORM} The reference DTO file does not contain a stats-folder and/or matrix-folder element. This script assumes they exist."
exit 1
fi
cmd="$PROFILE_CMD $VCF_TO_MATRIX --mode xml --num-threads 15 --dto-file $OUTPUT_DTO_FILE"
echo "$cmd"
JOB_ID=$(echo "$cmd" | qsub -N nasp_matrix.${SHA1}.${PROFILE_TAG} -l walltime=12:00:00 -o $OUTPUT_FOLDER/stdout.log -e $OUTPUT_FOLDER/stderr.log - | cut -d . -f 1)
#$cmd
## Compare the output against reference output
#if [ -e /home/jtravis/.bashrc ]; then
# echo "dref job:"
# # Load the dref function from .bashrc and use it to compare the vcf_to_matrix output against a reference folder
# echo "source /home/jtravis/.bashrc && fail_vcf_to_matrix $OUTPUT_FOLDER" | qsub -N "dref.${JOB_ID}" -W depend=afterok:${JOB_ID} -o /dev/null -e /dev/null -
#fi
#watch qstat
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment