Created
October 24, 2015 18:32
-
-
Save explodecomputer/ea760ad1de8faf91c651 to your computer and use it in GitHub Desktop.
filtering 1kg data to hm3 and recoding snp names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
module add apps/plink-1.90b3v | |
cd /panfs/panasas01/shared/alspac/deprecated/gib/data_for_suzi | |
cp /panfs/panasas01/shared/alspac/deprecated/alspac_combined_1kg_20140424/combined/alspac_1kg_p1v3_maf0.01_info0.8.bed als.bed | |
cp /panfs/panasas01/shared/alspac/deprecated/alspac_combined_1kg_20140424/combined/alspac_1kg_p1v3_maf0.01_info0.8.bim als.bim | |
cp /panfs/panasas01/shared/alspac/deprecated/alspac_combined_1kg_20140424/combined/alspac_1kg_p1v3_maf0.01_info0.8.fam als.fam | |
# Recode snp ids into chr_pos format | |
cp als.bim als.bim.orig | |
awk '{ print $1, $1"_"$4, $3, $4, $5, $6 }' als.bim.orig > als.bim | |
# Remove any duplicate SNP IDs | |
cp als.bim als.bim.orig2 | |
awk '{ | |
if (++dup[$2] > 1) { | |
print $1, $2".duplicate."dup[$2], $3, $4, $5, $6 | |
} else { | |
print $1,$2,$3,$4,$5,$6 } | |
}' als.bim.orig2 > als.bim | |
grep "duplicate" als.bim | awk '{ print $2 }' > duplicates.txt | |
plink1.90 --bfile als --exclude duplicates.txt --make-bed --out als_clean | |
plink1.90 --bfile als_clean --extract hm3.snplist --make-bed --out als_clean_hm3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment