Created
November 12, 2010 19:51
-
-
Save stephenturner/674574 to your computer and use it in GitHub Desktop.
impute_step0.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# For running step0 of the imputation procedure | |
# Splits files and creates random subset for step 1 of imputation | |
# adapted from original script written by Jacki Buros | |
# arguments are | |
# 1) plink bed source file | |
# 2) prefix for output files | |
# check number of arguments | |
E_BADARGS=65 | |
if [ ! -n "$1" ] | |
then | |
echo "Usage: `basename $0` <file basename> <output prefix> " | |
exit $E_BADARGS | |
fi | |
orig_dir=$(pwd) | |
prefix="$2" | |
# ---- settings ------ | |
# Plink bed source file (used in step0 script) | |
SRCDIR="$(pwd)" | |
PLINK="$1" | |
# store output here | |
OUTDIR="${orig_dir}" | |
# num individuals per group in step 1 impute | |
SUBSETSIZE=300 | |
# num individuals (total) per subset in step 2 impute | |
STEP2SIZE=200 | |
# paths | |
PLINKBIN="/usr/local/plink/plink --nonfounders --noweb" #CHANGE | |
GAWKBIN="/usr/bin/gawk" #CHANGE | |
TARBIN="/bin/tar" #CHANGE | |
# SUBSET and COMPLETE prefixes | |
COMPLETE="_${prefix}_complete" # name of plink files containing complete bed files where parents are set to 0 0 | |
SUBSET="_${prefix}_subset" # name of plink files containing subsets of the above files | |
GROUP="_${prefix}_group" # prefix for per-group id lists | |
USERNAME=$(whoami) | |
# prepare dirs for output & scratch | |
mkdir -p ${OUTDIR} | |
scratch="/scratch/${USERNAME}/${prefix}_impute" #CHANGE??? | |
mkdir -p ${scratch} | |
# if passing a pedigree file need to convert it to a binary file | |
if [ -f "${PLINK}.ped" ] | |
then | |
echo "Creating PLINK binary files" | |
$PLINKBIN --file ${PLINK} --map3 --allow-no-sex --make-bed --out ${PLINK} | |
fi | |
echo "$(date) | Copying source files in $SRCDIR to ${scratch}" | |
cd ${SRCDIR} | |
cp -a --dereference ${PLINK}.* ${scratch} | |
echo "$(date) | Preparing base PLINK bed file (named ${COMPLETE})" | |
gawk '{print $1,$2,"0","0"}' ${PLINK}.fam > _update_parents # pulls out family ids and individual ids from fam file and zero's the parents. | |
$PLINKBIN --bfile $PLINK --update-parents _update_parents --set-hh-missing --allow-no-sex --make-bed --out $COMPLETE #> /dev/null # plink command to update the parental info with zeros. | |
FAMFILE="temp.${PLINK}.fam" | |
cp ${PLINK}.fam ${FAMFILE} | |
rm ${PLINK}.* | |
echo "$(date) | Preparing subset PLINK bed file (named ${SUBSET}) to be used in Step 1" | |
# Randomly selects iids from fam file for use in model estimation | |
for i in `cut -d' ' -f 1-2 $FAMFILE| sed s/\ /,/g`; do echo "$RANDOM $i"; done | sort | cut -d' ' -f 2| sed s/,/\ /g | head -n $SUBSETSIZE > subset.iids | |
$PLINKBIN --bfile ${COMPLETE} --keep subset.iids --make-bed --out ${SUBSET} | |
rm *.ped | |
rm *.map | |
rm *.log | |
rm *.hh | |
echo "$(date) | Preparing list of ids per subset (named ${GROUP}*) to be used in step 2" | |
gawk '{print $1,$2}' ${COMPLETE}.fam > _idlist | |
split -d -l $STEP2SIZE _idlist $GROUP | |
echo "$(date) | Preparation complete; copy files to $OUTDIR" | |
tar cfz ${prefix}_step0.tar.gz ${COMPLETE}.* ${SUBSET}.* ${GROUP}* | |
rm -f ${GROUP}* | |
mv ${prefix}_step0.tar.gz ${OUTDIR} | |
echo "$(date) | copy remaining files to $OUTDIR/rsync & clean up" | |
mkdir -p ${OUTDIR}/rsync | |
rsync -avz ${scratch}/ ${OUTDIR}/rsync/ | |
# Let's clean up | |
if [[ $? -eq 0 ]] ; then | |
cd ${orig_dir} | |
rm -rf ${scratch} | |
else | |
echo "Unable to sync remaining files." | |
echo "Please ssh to $(hostname) and" | |
echo "look at the content of ${scratch}" | |
fi | |
echo "==========================================================" | |
echo "Finished on : $(date)" | |
echo "==========================================================" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment