Created
January 14, 2016 15:47
-
-
Save 0x3bfc/4257ea181c155d7e489b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os | |
import sys | |
import subprocess | |
""" | |
This method is inspired from this issue on stackoverflow | |
http://stackoverflow.com/questions/23091077/get-all-nested-directories-in-a-folder-python | |
""" | |
def list_dirs(path): | |
seen = set() | |
for root, dirs, files, in os.walk(path, topdown=False): | |
if dirs: | |
parent = root | |
while parent: | |
seen.add(parent) | |
parent = os.path.dirname(parent) | |
for d in dirs: | |
d = os.path.join(root,d) | |
if d not in seen: | |
yield d | |
# execute command lines | |
def execute(command, option=None): | |
pipe = subprocess.PIPE | |
p = subprocess.Popen(command,stdout=pipe,stderr=pipe,shell=True) | |
if option == 'wait': | |
p.wait() | |
return p.stdout.read()+"\n"+p.stderr.read() | |
# Merge all vcf files into one file | |
def vcf_merge(base_dir, output): | |
cur_dir = os.path.dirname(os.path.abspath(__file__)) | |
execute("mkdir -p %s/merge_dir/"%(cur_dir)) | |
i = 0 | |
for cdir in list_dirs(base_dir): | |
lst = os.listdir('./%s'%(cdir)) | |
i +=1 | |
for filename in lst: | |
if filename.endswith('.vcf'): | |
execute('cd ./%s && bgzip %s && tabix -p vcf %s.gz && cp %s.gz %s/merge_dir/%s-%s.gz && cp %s.gz.tbi %s/merge_dir/%s-%s.gz.tbi'%(cdir, filename, filename, filename, cur_dir,i,filename, filename, cur_dir, i, filename)) | |
execute("cd %s"%(cur_dir)) | |
# merge all vcf files into one large vcf file | |
vcf_files = os.listdir("%s/merge_dir/"%(cur_dir)) | |
vcfs = [vcf for vcf in vcf_files if vcf.endswith('.gz')] | |
vcfs = ' '.join(vcfs) | |
execute("cd %s/merge_dir/ && vcf-merge %s > %s"%(cur_dir, vcfs, output)) | |
execute("cd %s/merge_dir/ && bgzip %s && tabix -p vcf %s.gz && mv %s.gz ../ && mv %s.gz.tbi ../" %(cur_dir, output, output, output, output)) | |
# remove tmp merge directory | |
execute("rm -rf %s/merge_dir/"%(cur_dir)) | |
def help(): | |
return """ | |
Note: | |
- Make sure that you have installed vcftools | |
$ sudo apt-get install vcftools | |
- Please note that you have to move your current directory | |
to the directory of vcf files | |
Usage: | |
./vcf-merger . <output file> | |
""" | |
if __name__ == "__main__": | |
args = sys.argv | |
output = "merged.vcf" | |
base_dir = os.path.dirname(os.path.abspath(__file__)) | |
if len(args) < 2: | |
print help() | |
sys.exit(0) | |
else: | |
try: | |
base_dir = args[1] | |
output = args[2] | |
# call vcf merge | |
vcf_merge(base_dir, output) | |
except: | |
print help() | |
sys.exit(-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment