Created
October 20, 2024 10:41
-
-
Save explodecomputer/3626cf4280a59d3a8f8685b6faaef19a to your computer and use it in GitHub Desktop.
UKB convert bgen to best guess plink format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#!/bin/bash | |
for chr in {1..22} | |
do | |
# Get the list of variants that pass the threshold | |
bcftools filter -e 'INFO/R2<0.8' /mnt/project/Bulk/Imputation/Imputation\ from\ genotype\ \(TOPmed\)/helper_files/ukb21007_c${chr}_b0_v1.sites.vcf.gz | \ | |
bcftools filter -e 'INFO/AF<0.01' | \ | |
bcftools filter -e 'INFO/AF>0.99' | \ | |
bcftools query -f '%CHROM\t%POS\t%ID\t%REF\t%ALT\t%AF\t%R2\n' > variant_info_${chr}.txt | |
cut -f 3 variant_info_${chr}.txt > variant_list_${chr}.txt | |
awk '{print $1, $2, $2, $3}' variant_info_${chr}.txt | sort | uniq -u > variant_info_${chr}.range | |
cut -f 3 variant_info_${chr}.txt > variant_list_${chr}.txt | |
# Extract from plink | |
plink2 \ | |
--bgen \"/mnt/project/Bulk/Imputation/Imputation from genotype (TOPmed)/ukb21007_c${chr}_b0_v1.bgen\" ref-first \ | |
--sample \"/mnt/project/Bulk/Imputation/Imputation from genotype (TOPmed)/ukb21007_c${chr}_b0_v1.sample\" \ | |
--extract range variant_info_${chr}.range \ | |
--maf 0.01 \ | |
--make-bed \ | |
--out ukb21007_b0_v1_c${chr} | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment