This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
zcat /mnt/ukbb/adam/ptsd/ehr_reformat/74_dai2/daner_iPSYCH2015_PTSDbroad_chr1to22_HRC_MAF01.gz | awk '{print $2,$4,$5,$7}' | LC_ALL=C sort -k1b,1 > /mnt/ukbb/adam/ptsd/ehr_reformat/74_dai2/daner_iPSYCH2015_PTSDbroad_chr1to22_HRC_MAF01.afs | |
LC_ALL=C join -1 1 -2 2 <(cat /mnt/ukbb/adam/ptsd/ehr_reformat/74_dai2/daner_iPSYCH2015_PTSDbroad_chr1to22_HRC_MAF01.afs) <(LC_ALL=C sort -k2b,2 95_biom/PTSD_broad_EA_clean.head.assoc.logistic) > 95_biom/PTSD_broad_EA_clean.head.assoc.logistic.merged | |
awk '{FRQ=$4; A1=$7; if(A1==$2) { A2=$3} else if(A1 != $2){ A2=$2; FRQ=1-$4}; print $0,A2,FRQ}' 95_biom/PTSD_broad_EA_clean.head.assoc.logistic.merged | grep -v -E "A T$|T A$|C G$|G C$" > 95_biom/PTSD_broad_EA_clean.head.assoc.logistic.merged.gwas1 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
d1 <- fread('rgdifferences.csv',data.table=F) | |
#772 traits, so 0.05/772 = 6.476684e-05 | |
#I like it better with all correlatios shown. I';ll grey out the NS ones! | |
d1$color <- "grey" | |
d1[which(d1$p <= 6.476684e-05 | d1$p_trauma <= 6.476684e-05),]$color <- "black" #significant in at least one | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#LC_ALL=C join <(awk '{print $1}' w_hm3.snplist.sorted) <(awk '{print $22,$1}' eur_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.txt.annotated | LC_ALL=C sort -k1b,1) | awk '{print $2,$1}' | LC_ALL=C sort -k1b,1 > w_hm3.snplist.linked | |
for anc in his # eur aam asn his # "$anc" | |
do | |
#important to use ref and alt1 for this! | |
cat vcf_header1b.txt vcf_header2.txt <(awk '{OFS="\t"}{if (NR>1) print $1,$2, ".", $4,$5 , "100", "PASS", "MVP="$3}' "$anc"_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa | sort -n -k1r,1 -k2,2 ) > "$anc"_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.vcf | |
bcftools view "$anc"_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.vcf -Oz -o "$anc"_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.vcf.gz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#LC_ALL=C join <(awk '{print $1}' w_hm3.snplist.sorted) <(awk '{print $22,$1}' eur_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.txt.annotated | LC_ALL=C sort -k1b,1) | awk '{print $2,$1}' | LC_ALL=C sort -k1b,1 > w_hm3.snplist.linked | |
for anc in eur aam asn his # "$anc" | |
do | |
#important to use ref and alt1 for this! | |
cat vcf_header1b.txt vcf_header2.txt <(awk '{OFS="\t"}{if (NR>1) print $1,$2, ".", $4,$5 , "100", "PASS", "MVP="$3}' "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa | sort -n -k1r,1 -k2,2 ) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf | |
bcftools view "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf -Oz -o "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Do a kcolumn split of local ancestry | |
#Do a delimiting of | then kcolumn split of haplotypes vcf | |
zcat gtpc_phased_chr22.vcf.gz | tail -n+11 | sed 's/|/\t/g' > gtpc_phased_chr22.vcf.haps | |
zcat gtpc_phased_chr22.vcf.gz | head -n10 | tail -n1 | sed 's/#//g' | awk '{$1=$1; print}' > gtpc_phased_chr22.vcf.haps.header | |
#Notice that the odd/even selection REMOVES the columns, rather than selecting them, hence the file names are haps1 and haps0 | |
awk '{ for (i=10;i<=NF;i+=2) $i="" } 1' gtpc_phased_chr22.vcf.haps | cat gtpc_phased_chr22.vcf.haps.header - > gtpc_phased_chr22.vcf.haps1 | |
awk '{ for (i=11;i<=NF;i+=2) $i="" } 1' gtpc_phased_chr22.vcf.haps | cat gtpc_phased_chr22.vcf.haps.header - > gtpc_phased_chr22.vcf.haps0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#THe first step may be to just filter down the list of subjects. | |
#cat betr/qc1/dos_pts_betr_mix_am-qc.hg19.ch.fl.chr10_009_012.out.dosage.fam vets/qc1/dos_pts_vets_mix_am-qc.hg19.ch.fl.chr10_009_012.out.dosage.fam > test.fam | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Big note: | |
#The plink --fam d.fam --dosage c.txt list sepheader Zin --write-dosage command doesnt seem to work to combine male and female files | |
#Doing this will make each SNP appear to be a different SNP for the male and female files (in other words, N SNPs doubles, and will be ALL NA for | |
#either men or women | |
#Instead do the traditional command saying that the subjects are split but not hte SNPs, but make a temporary file where the SNPs are subset to onyl the | |
#intersecting set! | |
#Instead |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Find ancestry informative markers, using 1000 Genomes reference data as an example | |
#Use global ancestry as a phenotype for association analysis in unrelated KGP subjects | |
#Perform LD based clumping so that in each LD block, the strongest AIMs are selected | |
#Making sure that no indels or ambiguous markers are used | |
#Liftover markers (if you care) | |
#Get subject populations | |
#Extract only AFR and EA samples from the 1000G VCF data, on non monomorphic markers |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Rename duplicate rs-ids in a .bim file | |
#Sometimes you get .bim files where rs-ids are not unique. This renames the duplicates. Preference to not rename is given in order of first marker listed. | |
#V1 - Oct 28, 2016 | |
args <- commandArgs(trailingOnly = TRUE) | |
qced_bim <- args[1] | |
print("Reading .bim file") | |
dat1 <- read.table(qced_bim,stringsAsFactors=F,header=F) #Load the QCed SNP list | |
names(dat1) <- c("CHR","SNP","cM","BP","A1","A2") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
study=prom | |
for chr in {1..22} | |
do | |
plink2 --bfile pts_PROM_mix_am-qc --chr $chr --make-bed --out data/"$study"_$chr | |
done | |