Skip to content

Instantly share code, notes, and snippets.

@brantfaircloth
Created May 6, 2012 01:53
Show Gist options
  • Save brantfaircloth/2606999 to your computer and use it in GitHub Desktop.
Save brantfaircloth/2606999 to your computer and use it in GitHub Desktop.
Run SATe over MPI using mpi4py_map
#!/usr/bin/env python
# encoding: utf-8
"""
File: mpi_sate.py
Author: Brant Faircloth
Created by Brant Faircloth on 04 May 2012 15:05 PDT (-0700)
Copyright (c) 2012 Brant C. Faircloth. All rights reserved.
Description:
"""
import os
import glob
import mpimap
import shutil
import argparse
import tempfile
import subprocess
from phyluce.helpers import is_dir, FullPaths
#import pdb
def get_args():
"""Get arguments from CLI"""
parser = argparse.ArgumentParser(
description="""Program description""")
parser.add_argument(
"input",
type=is_dir,
action=FullPaths,
help="""Help text"""
)
parser.add_argument(
"output",
type=is_dir,
action=FullPaths,
help="""Help text""",
)
parser.add_argument(
"sate",
action=FullPaths,
help="""Help text""",
)
parser.add_argument(
"cfg",
action=FullPaths,
help="""Help text""",
)
return parser.parse_args()
def worker(work):
sate, cfg, f, content = work
#pdb.set_trace()
# create a tempdir to hold all our stuff
working = tempfile.mkdtemp()
# write content to outfile
descriptor, path = tempfile.mkstemp(dir=working, suffix='.mpi.fasta')
tf = os.fdopen(descriptor, 'w')
tf.write(content)
tf.close()
# run SATe
cli = [
'python',
sate,
'--input',
path,
'--output-directory',
working,
'--temporaries',
working,
cfg
]
stderr, stdout = subprocess.Popen(
cli,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE
).communicate()
# get contents of output file(s)
aln_name = "satejob.marker001.{0}.aln".format(os.path.splitext(os.path.basename(path))[0])
aln_file = os.path.join(working, aln_name)
aln = open(aln_file, 'rU').read()
# zap working tempdir
shutil.rmtree(working)
# return filename and align so we can store resulting alignments reasonably
return (f, aln)
def main():
args = get_args()
# iterate over files reading contents into a list that we'll pass to sate
file_contents = []
for f in glob.glob(os.path.join(args.input, '*.fasta')):
file_contents.append([args.sate, args.cfg, f, open(f, 'rU').read()])
# pass to map or Pool.map or mpimap
alignments = mpimap.map(worker, file_contents)
for data in alignments:
f, aln = data
outname = os.path.splitext(os.path.basename(f))[0]
out_file = os.path.join(args.output, outname) + '.aln'
out = open(out_file, 'w')
out.write(aln)
out.close()
if __name__ == '__main__':
main()
@brantfaircloth
Copy link
Author

Note that i'm importing mpimap, which is just a very slightly modified fork of https://github.com/twiecki/mpi4py_map to fix the setup.py and change the module name.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment