Created
November 12, 2010 20:03
-
-
Save stephenturner/674597 to your computer and use it in GitHub Desktop.
impute_step1.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Jacki Buros | |
# Step 1 impute | |
# check params | |
if [ -z $prefix ] | |
then | |
echo "$(date) | param 'prefix' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $chr ] | |
then | |
echo "$(date) | param 'chr' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $s1out ] | |
then | |
echo "$(date) | param 's1out' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $s0file ] | |
then | |
echo "$(date) | param 's0file' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $REFHAPS ] | |
then | |
echo "$(date) | param 'REFHAPS' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $REFSNPS ] | |
then | |
echo "$(date) | param 'REFSNPS' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [[ ! -e $REFHAPS ]] | |
then | |
echo "$(date) | reference haplotypes file (${REFHAPS}) does not exist. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [[ ! -e $REFSNPS ]] | |
then | |
echo "$(date) | reference haplotypes snps file (${REFSNPS}) does not exist. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [[ ! -e $s0file ]] | |
then | |
echo "$(date) | step 0 tar file (${s0file}) does not exist in local directory. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
# check executables | |
if [ -z $MACHBIN ] | |
then | |
echo "$(date) | param 'MACHBIN' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $PLINKBIN ] | |
then | |
echo "$(date) | param 'PLINKBIN' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $GAWKBIN ] | |
then | |
echo "$(date) | param 'GAWKBIN' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
if [ -z $TARBIN ] | |
then | |
echo "$(date) | param 'TARBIN' not defined. Exiting." | |
return $ERR_DELETE_FILES | |
fi | |
echo "==========================================================" | |
echo "Starting on : $(date)" | |
echo "Running on node : $(hostname)" | |
echo "Current directory : $(pwd)" | |
echo "Current job ID : $JOB_ID" | |
echo "Current job name : $JOB_NAME" | |
echo "Param value (chr) : $chr" | |
echo "==========================================================" | |
mach_settings=" --compact --greedy --rounds 100 " | |
# ---- settings ------ | |
# SUBSET and COMPLETE prefixes | |
COMPLETE="_${prefix}_chr${chr}_complete" # plink binary files containing genotype data for complete sample | |
SUBSET="_${prefix}_chr${chr}_subset" # plink binary files containing genotype data for step1 impute individuals only | |
GROUP="_${prefix}_chr${chr}_group" # prefix for per-group id lists | |
file="${SUBSET}_chr${chr}" # temp files prefix | |
# extract files required for this analysis | |
$TARBIN xfz ${s0file} ${SUBSET}.* | |
rm $s0file | |
if [[ -e ${SUBSET}.bed ]] ; then | |
# filter by chrom | |
echo "${PLINKBIN} --bfile ${SUBSET} --noweb --recode --set-hh-missing --chr ${chr} --out ${file}" | |
$PLINKBIN --bfile ${SUBSET} --noweb --recode --set-hh-missing --chr ${chr} --out ${file} | |
rm ${SUBSET}.* | |
else | |
echo "$(date) | Plink file ${SUBSET}.bed does not exist (TAR extract failed or file not in tar archive ${s0file}). Exiting" | |
return $ERR_COPY_FILES | |
fi | |
echo "$(date) | Preparing maps in MACH format" | |
if [[ -e ${file}.map ]]; then | |
$GAWKBIN '{print $1,$2,$4}' ${file}.map > ${file}.pmap | |
$GAWKBIN 'BEGIN {print "T","pheno";}{print "M",$2;}' ${file}.map > ${file}.dat | |
else | |
echo "$(date) | Plink ped file ${file}.map does not exist. Exiting" | |
return $ERR_COPY_FILES | |
fi | |
if [[ -e ${file}.ped && -e ${file}.dat ]]; then | |
echo "$(date) | Begin step 1 impute" | |
$MACHBIN --dat ${SUBSET}_chr${chr}.dat --ped ${SUBSET}_chr${chr}.ped --snps ${REFSNPS} --haps ${REFHAPS} $mach_settings --prefix ${s1out} --autoFlip > ${s1out}.log | |
machreturn=$? | |
echo "$(date) | Step 1 complete, exit code = ${?}" | |
else | |
echo "$(date) | Source genotype files ${file}.dat and ${file}.ped do not exist. Exiting" | |
return $ERR_COPY_FILES | |
fi | |
# Clean up | |
if [[ $machreturn -eq 0 ]] ; then | |
rm ${file}.* | |
return $OK | |
else | |
return $ERR_COPY_FILES | |
fi | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment