Last active
August 29, 2015 14:12
-
-
Save corburn/689ceccaa0cb3c610e86 to your computer and use it in GitHub Desktop.
Schedules a vcf_to_matrix job and diff the output against a reference folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Schedules a vcf_to_matrix job and diff the output against a reference folder | |
# | |
# The REPO_PATH and VCF_TO_MATRIX variables must be set by the user | |
# | |
# REFERENCE_PATH is an output folder from the nasp program. The vcf_to_matrix | |
# program will read its files and write its output to a folder | |
# Set Script Name variable | |
SCRIPT=`basename ${BASH_SOURCE[0]}` | |
# Set fonts | |
NORM=`tput sgr0` | |
BOLD=`tput bold` | |
REV=`tput smso` | |
#Initialize variables to default values. | |
PROFILE_TAG="" | |
PROFILE_CMD="" | |
# Path to the git repository the VCF_TO_MATRIX executable was compiled from | |
# It will be used to tag the output folder with a commit hash suffix | |
REPO_PATH="$HOME/NASP" | |
# Absolute path to the vcf_to_matrix executable | |
VCF_TO_MATRIX="/home/jtravis/.local/bin/vcf_to_matrix" | |
#Help function | |
function HELP { | |
echo -e \\n"Help documentation for ${BOLD}${SCRIPT}.${NORM}"\\n | |
echo -e "${REV}Basic usage:${NORM} ${BOLD}$SCRIPT path/to/reference/output${NORM}"\\n | |
echo "Command line switches are optional. The following switches are recognized." | |
echo "${REV}-p${NORM} --Profile the code with: kernprof, memory_profiler, or mprof" | |
echo "${REV}-i${NORM} --Set a unique identifier for the job and output. Default is the current repository revision or 'nullref'." | |
echo "${REV}-g${NORM} --Set the path to the git repository. Default is ${BOLD}${REPO_PATH}${NORM}." | |
echo -e "${REV}-h${NORM} --Displays this help message. No further functions are performed."\\n | |
echo -e "Example: ${BOLD}$SCRIPT -a foo -b man -c chu -d bar file.ext${NORM}"\\n | |
exit 1 | |
} | |
# If no arguments are passed, print help and exit. | |
if [ $# -eq 0 ]; then | |
HELP | |
fi | |
# Parse commandline flags | |
while getopts :p:i:g:h FLAG; do | |
case $FLAG in | |
p) # Set profiler. Assumes the profilers are installed and the functions of interest have the @profile decorator. | |
PROFILE_TAG="$OPTARG" | |
case $PROFILE_TAG in | |
"kernprof") | |
# Profile the command execution time on a per-line basis and write the profile to the $OUTPUT_FOLDER | |
# -o OUTPUT_FOLDER | |
# -l line-by-line profiling | |
PROFILE_CMD="kernprof -o $OUTPUT_FOLDER/vcf_to_matrix.lprof -l" | |
;; | |
"memory_profiler") | |
# Profile the command memory allocation on a per-line basis | |
PROFILE_TAG="memory_profiler" | |
PROFILE_CMD="python3 -m memory_profiler" | |
;; | |
"mprof") | |
# Profile the command memory allocation as a plottable function over time | |
# --python: Enable function timestamps | |
# --include-children: Monitor forked processes as well | |
PROFILE_TAG="mprof" | |
PROFILE_CMD="mprof run --include-children --python" | |
;; | |
*) | |
echo -e "\\nUnknown profiler '${BOLD}$PROFILE_TAG${NORM}'" | |
HELP | |
;; | |
esac | |
;; | |
i) # Set unique job and output identifier. | |
SHA1=$OPTARG | |
;; | |
g) # Set path to git repository | |
REPO_PATH=`readlink -e $OPTARG` | |
if [ $? -ne 0 ]; then | |
echo "Directory does not exist: $OPTARG" | |
exit 1 | |
# TODO: Check if repository exists | |
#elif ! command -v git > /dev/null; then | |
#elif [ "`cd $REPO_PATH && git rev-parse --is-inside-work-tree`" == "true" ]; then | |
fi | |
;; | |
h) # Show help | |
HELP | |
;; | |
\?) # Unrecognized option - show help | |
echo -e \\n"Option -${BOLD}$OPTARG${NORM} not allowed." | |
HELP | |
;; | |
esac | |
done | |
# Shift the the positional arguments index after parsing flags so they may be accessed as $1, $2, etc instead of relative to the flags. | |
shift $((OPTIND-1)) | |
if [ ! -x "$VCF_TO_MATRIX" ]; then | |
echo -e "\\n${REV}ERROR:${NORM} The VCF_TO_MATRIX is not executable. You must set the variable in the script file." | |
echo "It is assumed VCF_TO_MATRIX is a development version different from the globally installed version." | |
exit 1 | |
fi | |
# If a unique identifier was not given on the commandline, use the current revision from the REPO_PATH or fallback to 'nullref' if either | |
# git or the repo are undefined. | |
if [ -z "$SHA1" ]; then | |
if [ -d "$REPO_PATH" ] && command -v git > /dev/null && [ "`cd $REPO_PATH && git rev-parse --is-inside-work-tree`" == "true" ]; then | |
SHA1="`cd $REPO_PATH && git rev-parse --short HEAD`" | |
else | |
echo "WARNING: failed to determine git hash" | |
SHA1="nullref" | |
fi | |
fi | |
REFERENCE_FOLDER="`readlink -e $1`" | |
if [ $? -ne 0 ]; then | |
echo -e "\\n${REV}ERROR:${NORM} Failed to determine absolute path to the reference folder: $1" | |
HELP | |
fi | |
REFERENCE_DTO_FILE="`readlink -e $1/matrix_dto.xml`" | |
if [ $? -ne 0 ]; then | |
echo -e "\\n${REV}ERROR${NORM}: Failed to determine absolute path to reference matrix_dto.xml" | |
HELP | |
fi | |
# The first argument, $1, is a path to the reference folder. The output folder is a copy of | |
# of the reference folder with a git commit hash suffix. Shell parameter expansion is used | |
# here to trim the trailing slash from the reference folder path. | |
# see http://www.gnu.org/software/bash/manual/bashref.html#Shell-Parameter-Expansion | |
OUTPUT_FOLDER="$(readlink -f ${1%/}.$SHA1.${PROFILE_TAG})" | |
# If the output folder exists, find an available folder by incrementing a counter at the end of the folder name | |
i="0" | |
if [ -d "$OUTPUT_FOLDER" ]; then | |
while [ -d "${OUTPUT_FOLDER}.${i}" ]; do | |
i=$[$i+1] | |
done | |
OUTPUT_FOLDER=${OUTPUT_FOLDER}.${i} | |
fi | |
OUTPUT_DTO_FILE=$OUTPUT_FOLDER/matrix_dto.xml | |
# Create output folders | |
mkdir -pv $OUTPUT_FOLDER/{matrices,statistics} | |
if [ $? -ne 0 ]; then | |
echo -e "\\n${REV}ERROR:${NORM} Failed to create the output folders" | |
exit 1 | |
fi | |
# Replace matrices and statistics paths in matrix_dto.xml with the $OUTPUT_FOLDER/{matrices,statistics} paths | |
# NOTE: the regex assumes the xml has stats-folder and matrix-folder elements with existing paths | |
sed -E 's:(<stats-folder>|<matrix-folder>)(.*)/(statistics|matrices)(</stats-folder>|</matrix-folder>):\1'${OUTPUT_FOLDER}'/\3\4:g' $REFERENCE_DTO_FILE > $OUTPUT_DTO_FILE | |
# Verify the sed regex stats-folder and matrix-folder assumptions | |
# This could also be accomplished by sed returning an error if no match / the file paths were not changed | |
# http://stackoverflow.com/questions/15965073/return-code-of-sed-for-no-match | |
grep -q "<stats-folder>.*statistics</stats-folder>" $REFERENCE_DTO_FILE && grep -q "<matrix-folder>.*</matrix-folder>" $REFERENCE_DTO_FILE | |
if [ $? -ne 0 ]; then | |
echo -e "\\n${REV}ERROR:${NORM} The reference DTO file does not contain a stats-folder and/or matrix-folder element. This script assumes they exist." | |
exit 1 | |
fi | |
cmd="$PROFILE_CMD $VCF_TO_MATRIX --mode xml --num-threads 15 --dto-file $OUTPUT_DTO_FILE" | |
echo "$cmd" | |
JOB_ID=$(echo "$cmd" | qsub -N nasp_matrix.${SHA1}.${PROFILE_TAG} -l walltime=12:00:00 -o $OUTPUT_FOLDER/stdout.log -e $OUTPUT_FOLDER/stderr.log - | cut -d . -f 1) | |
#$cmd | |
## Compare the output against reference output | |
#if [ -e /home/jtravis/.bashrc ]; then | |
# echo "dref job:" | |
# # Load the dref function from .bashrc and use it to compare the vcf_to_matrix output against a reference folder | |
# echo "source /home/jtravis/.bashrc && fail_vcf_to_matrix $OUTPUT_FOLDER" | qsub -N "dref.${JOB_ID}" -W depend=afterok:${JOB_ID} -o /dev/null -e /dev/null - | |
#fi | |
#watch qstat |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment