Last active
September 8, 2018 21:40
-
-
Save dinovski/cb8539af2d2f3a78b7018dad20ab0ae4 to your computer and use it in GitHub Desktop.
get common variants from dbSNP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/env/ python | |
usage=""" | |
# get dbSNP file | |
wget --timestamping 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/snp147Common.txt.gz' -O snp147Common.txt.gz | |
python dbsnp_clean.py <snp147Common.txt> <snp147common_clean.bed> | |
""" | |
if len(sys.argv) != 3: | |
print usage | |
sys.exit() | |
import sys | |
infile = open(sys.argv[1],'r') | |
outfile = open(sys.argv[2],'w') | |
for line in infile: | |
record = line.split("\t") | |
## exclude indels | |
if not "single" in record[11]: | |
continue | |
chrom = record[1] | |
new_chrom = chrom.replace('chr','') | |
end = record[3] | |
start = int(end) - 1 | |
rsid = record[4] | |
num_alleles = record[21] | |
alleles = record[22] | |
an = record[23] | |
af = record[24] | |
#for i in range(len(record)): | |
# print(i, record[i]) | |
## get major allele based on AF | |
my_af = af.rstrip(",").split(",") | |
my_alleles = alleles.rstrip(",").split(",") | |
## get index for max AF | |
major_ind = my_af.index(max(my_af)) | |
major_allele = my_alleles[major_ind] | |
major_freq = my_af[major_ind] | |
""" | |
if not len(my_alleles) == len(my_af): | |
print record[4] | |
print "ALLELES: %s" % my_alleles | |
print "allele length: %s" % len(my_alleles) | |
print "af length %s" % len(my_af) | |
print "freqs: %s" % my_af | |
#print "index: %s" % major_ind | |
#print "major: %s" % major_allele | |
#print "freq: %s" % major_freq | |
""" | |
new_record=new_chrom + "\t" + str(start) + "\t" + end + "\t" + rsid + "\t" + major_allele + "\t" + major_freq + "\n" | |
outfile.write(new_record) | |
infile.close() | |
outfile.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment