Created
April 26, 2019 17:01
-
-
Save nievergeltlab/26de0edbb8a76481368101a2b2ed164e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import math | |
######################################################## | |
# plink2metasoft.py | |
# Convert Plink .assoc files to Metasoft input file | |
# Free license -- you are free to use it in any ways | |
# Buhm Han (2012) | |
######################################################## | |
import sys, subprocess, os | |
comple = {'A':'T', 'T':'A', 'G':'C', 'C':'G'} | |
# PROCESS ARGUMENTS | |
if len(sys.argv) < 3: | |
print_and_exit() | |
out=sys.argv[1] | |
files=sys.argv[2:] | |
# READ FILES | |
studies=[] | |
for f in files: | |
study={} | |
fin=open(f) | |
colnames=fin.next().split() | |
for line in fin: | |
snp={} | |
for (x,y) in zip(colnames,line.split()): | |
snp[x]=y | |
rsid=snp['SNP'] | |
study[rsid]=snp | |
fin.close() | |
studies.append(study) | |
# UNION OF SNPS | |
allsnps={} | |
for study in studies: | |
for rsid, snp in study.iteritems(): | |
allsnps[rsid]=snp | |
# SORT SNPS | |
rsids=sorted(allsnps.keys(), | |
) # key=lambda x:(allsnps[x]['CHR'],int(allsnps[x]['BP']))) | |
# MERGE STUDIES | |
fout=open(out+'.meta','w') #open(out+'.meta.tmp','w') | |
fmap=open(out+'.mmap','w') | |
flog=open(out+'.log','w') | |
for rsid in rsids: | |
output=rsid+'\t' | |
pivot=-1 | |
pivotstudyindex=-1 | |
numstudy=0 | |
for study in studies: | |
studyindex=studies.index(study)+1 | |
if rsid in study: | |
snp=study[rsid] | |
if 'OR' in snp: | |
beta1=float(snp['OR']) | |
if beta1 == 0 : | |
beta='NA' | |
stderr='NA' | |
#Do not convert to log scale if OR is 0, just consider it invalid. | |
else: | |
beta=math.log(beta1) | |
beta=str(beta) #'log(%s)'%snp['OR'] | |
stderr=snp['SE'] | |
elif 'BETA' in snp: | |
beta=snp['BETA'] | |
stderr=snp['SE'] | |
else: | |
assert 0, 'OR or BETA must be in columns' | |
#if 'P' in snp: | |
#p=snp['P'] | |
#else: | |
#assert 0, 'P must be in columns' | |
#'abs(%s/qnorm(%s/2))'%(beta,p) | |
#add some error handling to deal with NA ses... | |
se1=snp['SE'] | |
if se1 != "NA": | |
se1=float(se1) | |
else: | |
stderr='NA' | |
if se1 == 0: | |
stderr='NA' | |
print 'SE of 0 found for ' , snp['SNP'] | |
if pivot == -1: | |
pivot=snp # 1ST STUDY's SNP INFO IS PIVOT | |
pivotstudyindex=studyindex | |
else: | |
# CHECK ALLELE TO PIVOT | |
if 'A2' in pivot and 'A2' in snp and beta != "NA": | |
if pivot['A1'] == snp['A1'] and \ | |
pivot['A2'] == snp['A2']: | |
# GOOD | |
pass | |
elif pivot['A1'] == snp['A2'] and \ | |
pivot['A2'] == snp['A1']: | |
# SIMPLE FLIP | |
beta=str(-1*float(beta)) | |
elif snp['A1'] not in comple or snp['A2'] not in comple: | |
beta='NA' | |
stderr='NA' | |
#Adam: Really hard to deal with multi allelics on different strands, i won't even bother with this shit | |
flog.write('Due to non acgt, cant align alleles for %s in study %d\n'%(rsid,studyindex)) | |
elif pivot['A1'] == comple[snp['A1']] and \ | |
pivot['A2'] == comple[snp['A2']]: | |
# STRAND INCONSIS., BUT GOOD | |
flog.write('FLIP_STRAND %s in study %d\n'%(rsid,studyindex)) | |
elif pivot['A1'] == comple[snp['A2']] and \ | |
pivot['A2'] == comple[snp['A1']]: | |
# STRAND INCONSIS., SIMPLE FLIP | |
flog.write('FLIP_STRAND %s in study %d\n'%(rsid,studyindex)) | |
beta=str(-1*float(beta)) | |
else: | |
print 'SNP' , snp['SNP'], 'Error. Expected alleles', pivot['A1'], pivot['A2'],' but got', snp['A1'], snp['A2'] | |
flog.write('EXCLUDE %s due to allele inconsistency: A1:%s A2:%s in study %d but A1:%s A2:%s in study %d\n' | |
%(rsid, pivot['A1'], pivot['A2'], pivotstudyindex, | |
snp['A1'], snp['A2'], studyindex)) | |
beta='NA' | |
stderr='NA' | |
#If you can't flip it, set it to NA. | |
else: | |
if pivot['A1'] == snp['A1']: | |
# GOOD | |
#This code exists if you DO NOT have an A2 allele in your results - just assume that if A1 matches both, that the second allele is OK. | |
pass | |
else: | |
flog.write('EXCLUDE %s due to allele inconsistency: A1:%s in study %d but A1:%s in study %d\n' | |
%(rsid, pivot['A1'], pivotstudyindex, | |
snp['A1'], studyindex)) | |
beta='NA' | |
stderr='NA' | |
#If you can't flip it, set it to NA. | |
# CHECK CHR & BP TO PIVOT | |
#if pivot['CHR'] != snp['CHR']: | |
# flog.write('WARNING %s has chr inconsistency\n'%rsid) | |
#if pivot['BP'] != snp['BP']: | |
# flog.write('WARNING %s has basepair inconsistency\n'%rsid) | |
output+=beta+' '+stderr+' ' | |
numstudy+=1 | |
else: | |
output+='NA NA ' | |
#If a marker isn't found, just output NA | |
if numstudy == 1: | |
flog.write('EXCLUDE %s due to being in single study\n'%rsid) | |
else: | |
fout.write(output+'\n') | |
fmap.write('%s\t%s\t%d\n'% # fmap.write('%s\t%s\t%s\t%s\t%d\n'% | |
(rsid, pivot['A1'], numstudy)) # (rsid, pivot['CHR'], pivot['BP'], pivot['A1'], numstudy)) | |
fout.close() | |
fmap.close() | |
flog.close() | |
# CALL R TO EVALUATE MATH EXPRESSION | |
#subprocess.call(['R --vanilla --slave "--args '+out+'.meta.tmp '+out+'.meta" < plink2metasoft_subroutine.R'],shell=True) | |
#subprocess.call(['rm '+out+'.meta.tmp'], shell=True) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment