Last active
March 16, 2017 12:39
-
-
Save explodecomputer/52cec336daba269722969cb0b0ed3fbe to your computer and use it in GitHub Desktop.
example plink
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Load plink2 module | |
module add apps/plink-1.90b3v | |
# Location of the biobank genotype data | |
datadir="/panfs/panasas01/shared-biobank/data/" | |
# Location of the user supplied phenotype file | |
phenfile="phenfile.txt" | |
# Which chromosome | |
chr="01" | |
# Default covariates | |
princomp="${datadir}/derived/principal_components/data.txt" | |
chipvariable="chipcov.txt" | |
# If either of these are not available then they should be set to "NULL" | |
discrete_covs="dcov.txt" | |
continuous_covs="ccov.txt" | |
# This file will be generated by the R script | |
combined_covs="covs.txt" | |
# IDs and SNPs to include | |
keeplist="european_unrelated_ids.txt" | |
snplist="snps_maf_info_filter.txt" | |
# Where to store the association results | |
outfile="results" | |
# IF CONTINUOUS PHENOTYPE | |
model="linear" | |
# This creates a ${.assoc.logistic file | |
# IF CASE/CONTROL PHENOTYPE | |
model="logistic" | |
# Generate the covariate file | |
Rscript make_covs.R ${chipvariable} ${princomp} ${continuous_covs} ${discrete_covs} ${combined_covs} | |
# Run plink | |
plink \ | |
--bfile ${datadir}/bestguess/data_chr${chr} \ | |
--pheno ${phenfile} \ | |
--covar ${combined_covs} \ | |
--keep ${keeplist} \ | |
--extract ${snplist} \ | |
--${model} \ | |
--out ${outfile} | |
# Keep only the additive effect estimates (not the covariate effect estimates) | |
awk 'NR==1 || /ADD/' ${outfile}.assoc.${model} > ${outfile}.txt | |
rm ${outfile}.assoc.${model} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
arguments <- commandArgs(T) | |
chipfile <- arguments[1] | |
pcfile <- arguments[2] | |
qcovfile <- arguments[3] | |
dcovfile <- arguments[4] | |
outfile <- arguments[5] | |
filelist <- c(chipfile, pcfile, qcovfile, dcovfile) | |
filelist <- filelist[filelist != "NULL"] | |
l <- lapply(filelist, function(x) { | |
a <- fread(x) | |
message(x, ": ", nrow(a), " samples") | |
names(a)[1] <- "V1" | |
names(a)[2] <- "V2" | |
return(a) | |
}) | |
# keep 10 PCs only | |
l[[2]] <- l[[2]][,1:12] | |
out <- merge(l[[1]], l[[2]], by=c("V1", "V2")) | |
if(length(l) > 2) | |
{ | |
n <- length(l) | |
for(i in 3:n) | |
{ | |
out <- merge(out, l[[i]], by=c("V1", "V2")) | |
} | |
} | |
stopifnot(all(names(out)[1:2] == c("V1", "V2"))) | |
names(out)[1:2] <- c("FID", "IID") | |
message("Samples remaining: ", nrow(out)) | |
write.table(out, file=outfile, row=FALSE, col=TRUE, quote=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment