Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Last active February 11, 2021 14:29
Show Gist options
  • Save explodecomputer/ab3552ee06b5c0df76139fc587373599 to your computer and use it in GitHub Desktop.
Save explodecomputer/ab3552ee06b5c0df76139fc587373599 to your computer and use it in GitHub Desktop.
Calculate PCs in ALSPAC data
($1 == 1) && ($4 >= 48227413) && ($4 <= 52227412) {print $2}
($1 == 2) && ($4 >= 86000000) && ($4 <= 100500000) {print $2}
($1 == 2) && ($4 >= 183291755) && ($4 <= 190291755) {print $2}
($1 == 3) && ($4 >= 47524996) && ($4 <= 50024996) {print $2}
($1 == 3) && ($4 >= 83417310) && ($4 <= 86917310) {print $2}
($1 == 5) && ($4 >= 128972101) && ($4 <= 131972101) {print $2}
($1 == 5) && ($4 >= 44500000) && ($4 <= 50500000) {print $2}
($1 == 6) && ($4 >= 57000000) && ($4 <= 64000000) {print $2}
($1 == 6) && ($4 >= 25392021) && ($4 <= 33392022) {print $2}
($1 == 6) && ($4 >= 139958307) && ($4 <= 142458307) {print $2}
($1 == 7) && ($4 >= 55000000) && ($4 <= 66000000) {print $2}
($1 == 8) && ($4 >= 7962590) && ($4 <= 11962591) {print $2}
($1 == 8) && ($4 >= 111930824) && ($4 <= 114930824) {print $2}
($1 == 8) && ($4 >= 43000000) && ($4 <= 50000000) {print $2}
($1 == 10) && ($4 >= 37000000) && ($4 <= 43000000) {print $2}
($1 == 11) && ($4 >= 87860352) && ($4 <= 90860352) {print $2}
($1 == 12) && ($4 >= 33000000) && ($4 <= 40000000) {print $2}
($1 == 20) && ($4 >= 32536339) && ($4 <= 35066586) {print $2}
($1 == 1) && ($4 >= 48287981) && ($4 <= 52287979) {print $2}
($1 == 2) && ($4 >= 86088343) && ($4 <= 101041482) {print $2}
($1 == 2) && ($4 >= 134666269) && ($4 <= 138166268) {print $2}
($1 == 2) && ($4 >= 183174495) && ($4 <= 190174494) {print $2}
($1 == 3) && ($4 >= 47524997) && ($4 <= 50024996) {print $2}
($1 == 3) && ($4 >= 83417311) && ($4 <= 86917310) {print $2}
($1 == 3) && ($4 >= 88917311) && ($4 <= 96017310) {print $2}
($1 == 5) && ($4 >= 44464244) && ($4 <= 50464243) {print $2}
($1 == 5) && ($4 >= 97972101) && ($4 <= 100472101) {print $2}
($1 == 5) && ($4 >= 128972102) && ($4 <= 131972101) {print $2}
($1 == 5) && ($4 >= 135472102) && ($4 <= 138472101) {print $2}
($1 == 6) && ($4 >= 25392022) && ($4 <= 33392022) {print $2}
($1 == 6) && ($4 >= 56892042) && ($4 <= 63942041) {print $2}
($1 == 6) && ($4 >= 139958308) && ($4 <= 142458307) {print $2}
($1 == 7) && ($4 >= 55225792) && ($4 <= 66555850) {print $2}
($1 == 8) && ($4 >= 7962591) && ($4 <= 11962591) {print $2}
($1 == 8) && ($4 >= 42880844) && ($4 <= 49837447) {print $2}
($1 == 8) && ($4 >= 111930825) && ($4 <= 114930824) {print $2}
($1 == 10) && ($4 >= 36959995) && ($4 <= 43679994) {print $2}
($1 == 11) && ($4 >= 46043425) && ($4 <= 57243424) {print $2}
($1 == 11) && ($4 >= 87860353) && ($4 <= 90860352) {print $2}
($1 == 12) && ($4 >= 33108734) && ($4 <= 41713733) {print $2}
($1 == 12) && ($4 >= 111037281) && ($4 <= 113537280) {print $2}
($1 == 20) && ($4 >= 32536340) && ($4 <= 35066586) {print $2}
#!/bin/bash
data="../data/bestguess/data"
# Generate GRM
plink1.90 --bfile ${data} --make-grm-bin --maf 0.01 --out ../data/derived/kinships/data
# Get Mother and children ID lsits
awk '{ print $1, $2 }' ${data}.fam | grep "M" > mothers.txt
awk '{ print $1, $2 }' ${data}.fam | grep -v "M" > children.txt
# Get unrelateds
gcta64 --grm ../data/derived/kinships/data --keep mothers.txt --grm-cutoff 0.05 --make-grm --out ../data/derived/unrelated_ids/mothers
gcta64 --grm ../data/derived/kinships/data --remove mothers.txt --grm-cutoff 0.05 --make-grm --out ../data/derived/unrelated_ids/children
rm ../data/derived/unrelated_ids/children.grm.bin ../data/derived/unrelated_ids/children.grm.N.bin
rm ../data/derived/unrelated_ids/mothers.grm.bin ../data/derived/unrelated_ids/mothers.grm.N.bin
mv ../data/derived/unrelated_ids/mothers.grm.id ../data/derived/unrelated_ids/mothers/inclusion_list.txt
mv ../data/derived/unrelated_ids/children.grm.id ../data/derived/unrelated_ids/children/inclusion_list.txt
# Make inclusion list
awk '{ print $1 }' ../data/derived/unrelated_ids/children/inclusion_list.txt > children_unrel.txt
awk '{ print $1 }' ../data/derived/unrelated_ids/mothers/inclusion_list.txt > mothers_unrel.txt
# Make exclusion list
fgrep -vwf children_unrel.txt children.txt > ../data/derived/unrelated_ids/children/exclusion_list.txt
fgrep -vwf mothers_unrel.txt mothers.txt > ../data/derived/unrelated_ids/mothers/exclusion_list.txt
# Get snp list with no long range LD regions
awk -f ld.awk ${data}.bim > nold.txt
# Get independent SNPs excluding any long range LD regions
plink1.90 --bfile ${data} --exclude nold.txt --indep 100 5 1.01 --out indep
# Calculate PCs for unrelateds
plink1.90 --bfile ${data} --keep ../data/derived/unrelated_ids/mothers/inclusion_list.txt --extract indep.prune.in --pca 20 --out ../data/derived/principal_components/mothers/data
plink1.90 --bfile ${data} --keep ../data/derived/unrelated_ids/children/inclusion_list.txt --extract indep.prune.in --pca 20 --out ../data/derived/principal_components/children/data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment