Created
September 18, 2012 15:40
-
-
Save mgalardini/3743820 to your computer and use it in GitHub Desktop.
Parallel Evolutionary Biology
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
''' | |
Launch codeml on all the alignments | |
Marco Galardini 2012 | |
University of Florence | |
GPL v3.0 | |
''' | |
from optparse import OptionParser, OptionGroup | |
from time import strftime | |
import sys | |
import os | |
import Bio.Phylo.PAML | |
from multiprocessing.queues import Queue | |
import multiprocessing | |
import time | |
import shutil | |
class CodeML(object): | |
def __init__(self, indir, align, tree): | |
self.indir = indir | |
self.align=align | |
self.tree=tree | |
def __call__(self): | |
from Bio.Phylo.PAML import codeml | |
import os | |
try:os.mkdir('paml') | |
except:pass | |
try:os.mkdir('paml/%s'%os.path.split(self.align)[-1]) | |
except:pass | |
cml = codeml.Codeml(alignment = self.align, tree = self.tree, | |
out_file = "tmpcodeml/%s.out"%os.path.split(self.align)[-1], | |
working_dir='paml/%s'%os.path.split(self.align)[-1]) | |
cml.set_options(NSsites = "1 2", seqtype = 1, model = 0, RateAncestor = 1) | |
cml.ctl_file = "../../tmpcodeml/%s.ctl"%os.path.split(self.align)[-1] | |
try: | |
res = cml.run() | |
shutil.move('paml/%s/rst'%os.path.split(self.align)[-1], "tmpcodeml/%s.rst"%os.path.split(self.align)[-1]) | |
shutil.move('paml/%s/rst1'%os.path.split(self.align)[-1], "tmpcodeml/%s.rst1"%os.path.split(self.align)[-1]) | |
except: | |
res = None | |
return (self.align,res) | |
class Consumer(multiprocessing.Process): | |
def __init__(self, | |
task_queue = multiprocessing.Queue(), | |
result_queue = multiprocessing.Queue()): | |
multiprocessing.Process.__init__(self) | |
self.task_queue = task_queue | |
self.result_queue = result_queue | |
def run(self): | |
while True: | |
next_task = self.task_queue.get() | |
time.sleep(0.01) | |
if next_task is None: | |
# Poison pill means we should exit | |
break | |
answer = next_task() | |
self.result_queue.put(answer) | |
return | |
class MultiProcess(object): | |
''' | |
Class MultiProcess | |
An object that can perform multiprocesses | |
''' | |
def __init__(self,ncpus=1): | |
self.ncpus = int(ncpus) | |
# Parallelization | |
self._parallel = None | |
self._paralleltasks = Queue() | |
self._parallelresults = Queue() | |
def initiateParallel(self): | |
self._parallel = [Consumer(self._paralleltasks,self._parallelresults) | |
for x in range(self.ncpus)] | |
for consumer in self._parallel: | |
consumer.start() | |
def addPoison(self): | |
for consumer in self._parallel: | |
self._paralleltasks.put(None) | |
def isTerminated(self): | |
for consumer in self._parallel: | |
if consumer.is_alive(): | |
return False | |
return True | |
def killParallel(self): | |
for consumer in self._parallel: | |
consumer.terminate() | |
def doCodeML(self, indir, tree): | |
i = 0 | |
dres = {} | |
redo = open('codemlfail.txt','w') | |
self.initiateParallel() | |
for f in os.listdir(indir): | |
if f[-4:] != '.phy':continue | |
align = os.path.join(indir, f) | |
obj = CodeML(indir, align, tree) | |
self._paralleltasks.put(obj) | |
# Poison pill to stop the workers | |
self.addPoison() | |
while True: | |
while not self._parallelresults.empty(): | |
result = self._parallelresults.get() | |
if not result[1]: | |
msg(result[0],'ERR') | |
redo.write('%s\n'%result[0]) | |
else: | |
msg('%s %d'%(result[0],i),'IMP') | |
i += 1 | |
if self.isTerminated(): | |
break | |
time.sleep(0.1) | |
# Get the last messages | |
while not self._parallelresults.empty(): | |
result = self._parallelresults.get() | |
if not result[1]: | |
msg(result[0],'ERR') | |
redo.write('%s\n'%result[0]) | |
else: | |
msg('%s %d'%(result[0],i),'IMP') | |
i += 1 | |
self.killParallel() | |
return dres | |
class Highlighter: | |
def __init__(self): | |
self._msgTypes={'INF':'\033[0m', | |
'IMP':'\033[1;32m', | |
'DEV':'\033[1;34m', | |
'ERR':'\033[1;31m', | |
'WRN':'\033[1;33m'} | |
self._reset='\033[0m' | |
self._default='INF' | |
def ColorMsg(self,msg,msgLevel='INF'): | |
try: | |
s=self._msgTypes[msgLevel]+msg+self._reset | |
except:s=s=self._msgTypes[self._default]+msg+self._reset | |
return s | |
def msg(message, msgLevel='INF', sameline=False): | |
o=Highlighter() | |
if sameline: | |
sys.stderr.write('\r') | |
else: | |
sys.stderr.write(strftime("%H:%M:%S") + ' ') | |
sys.stderr.write(o.ColorMsg(message,msgLevel)) | |
if not sameline: | |
sys.stderr.write('\n') | |
def creturn(): | |
sys.stderr.write('\n') | |
def getOptions(): | |
'''Retrieve the options passed from the command line''' | |
usage = "usage: python parallelPAML.py [options]" | |
parser = OptionParser(usage) | |
group1 = OptionGroup(parser, "Inputs") | |
group1.add_option('-a', '--aligndir', action="store", dest='align', | |
default='OUT', | |
help='Alignment directory') | |
group1.add_option('-t', '--tree', action="store", dest='tree', | |
default='TREE.nwk', | |
help='Tree file') | |
group1.add_option('-r', '--threads', action="store", dest='threads', | |
default=1, | |
help='Threads [Default: 1]') | |
parser.add_option_group(group1) | |
# Parse the options | |
return parser.parse_args() | |
(options, args) = getOptions() | |
dres = MultiProcess(options.threads).doCodeML(options.align,options.tree) | |
import json | |
json.dump(dres,open('codemlresults.out','w')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
''' | |
Launch Phy_ML multi on all the alignments | |
Nearly FAIL-safe | |
Marco Galardini 2012 | |
University of Florence | |
GPL v3.0 | |
''' | |
from optparse import OptionParser, OptionGroup | |
from time import strftime | |
import sys | |
import os | |
from multiprocessing.queues import Queue | |
import multiprocessing | |
import subprocess | |
import time | |
import shutil | |
import logging | |
logger = logging.getLogger('PHYML') | |
ch = logging.StreamHandler() | |
logger.addHandler(ch) | |
class PHYML(object): | |
def __init__(self, indir, align, tree): | |
self.indir = indir | |
self.align=align | |
self.tree=tree | |
def __call__(self): | |
def target(): | |
self.process = subprocess.Popen(self.cmd, shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = self.process.communicate() | |
self.return_code = self.process.returncode | |
cmd = './phyml_multi %s 0 i 1 0 HKY e e 4 e %s y y y 1'%(os.path.join(self.indir,self.align),self.tree) | |
proc = subprocess.Popen(cmd, | |
shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = proc.communicate() | |
if 'failed' in out[0] or 'failed' in out[1]: | |
logger.warning('%s optimisation failed'%self.align) | |
# Failed | |
cmd = './phyml_multi %s 0 i 1 0 HKY e e 4 e %s n n y 1'%(os.path.join(self.indir,self.align),self.tree) | |
proc = subprocess.Popen(cmd, | |
shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = proc.communicate() | |
if 'failed' in out[0] or 'failed' in out[1]: | |
logger.warning('%s optimisation failed (1)'%self.align) | |
return (self.align,'-666') | |
# Second step | |
# python PartitioningHMM.py Example/simSeqs.phy_phyml_siteLks.txt 0.99 | |
# Just do it yourself | |
return (self.align,0) | |
class Consumer(multiprocessing.Process): | |
def __init__(self, | |
task_queue = multiprocessing.Queue(), | |
result_queue = multiprocessing.Queue()): | |
multiprocessing.Process.__init__(self) | |
self.task_queue = task_queue | |
self.result_queue = result_queue | |
def run(self): | |
while True: | |
next_task = self.task_queue.get() | |
time.sleep(0.01) | |
if next_task is None: | |
# Poison pill means we should exit | |
break | |
answer = next_task() | |
self.result_queue.put(answer) | |
return | |
class MultiProcess(object): | |
''' | |
Class MultiProcess | |
An object that can perform multiprocesses | |
''' | |
def __init__(self,ncpus=1): | |
self.ncpus = int(ncpus) | |
# Parallelization | |
self._parallel = None | |
self._paralleltasks = Queue() | |
self._parallelresults = Queue() | |
def initiateParallel(self): | |
self._parallel = [Consumer(self._paralleltasks,self._parallelresults) | |
for x in range(self.ncpus)] | |
for consumer in self._parallel: | |
consumer.start() | |
def addPoison(self): | |
for consumer in self._parallel: | |
self._paralleltasks.put(None) | |
def isTerminated(self): | |
for consumer in self._parallel: | |
if consumer.is_alive(): | |
return False | |
return True | |
def killParallel(self): | |
for consumer in self._parallel: | |
consumer.terminate() | |
def doPHYML(self, indir, tree): | |
i = 0 | |
dres = {} | |
redo = open('phymlfail.txt','w') | |
self.initiateParallel() | |
for f in os.listdir(indir): | |
if not f.endswith('.phy'):continue | |
align = f | |
obj = PHYML(indir, align, tree) | |
self._paralleltasks.put(obj) | |
# Poison pill to stop the workers | |
self.addPoison() | |
while True: | |
while not self._parallelresults.empty(): | |
result = self._parallelresults.get() | |
if result[1] != 0: | |
msg(result[0],'ERR') | |
redo.write('%s\n'%result[0]) | |
else: | |
msg('%s %d'%(result[0],i),'IMP') | |
i += 1 | |
if self.isTerminated(): | |
break | |
time.sleep(0.1) | |
# Get the last messages | |
while not self._parallelresults.empty(): | |
result = self._parallelresults.get() | |
if result[1] != 0: | |
msg(result[0],'ERR') | |
redo.write('%s\n'%result[0]) | |
else: | |
msg('%s %d'%(result[0],i),'IMP') | |
i += 1 | |
self.killParallel() | |
return dres | |
class Highlighter: | |
def __init__(self): | |
self._msgTypes={'INF':'\033[0m', | |
'IMP':'\033[1;32m', | |
'DEV':'\033[1;34m', | |
'ERR':'\033[1;31m', | |
'WRN':'\033[1;33m'} | |
self._reset='\033[0m' | |
self._default='INF' | |
def ColorMsg(self,msg,msgLevel='INF'): | |
try: | |
s=self._msgTypes[msgLevel]+msg+self._reset | |
except:s=s=self._msgTypes[self._default]+msg+self._reset | |
return s | |
def msg(message, msgLevel='INF', sameline=False): | |
o=Highlighter() | |
if sameline: | |
sys.stderr.write('\r') | |
else: | |
sys.stderr.write(strftime("%H:%M:%S") + ' ') | |
sys.stderr.write(o.ColorMsg(message,msgLevel)) | |
if not sameline: | |
sys.stderr.write('\n') | |
def creturn(): | |
sys.stderr.write('\n') | |
def getOptions(): | |
'''Retrieve the options passed from the command line''' | |
usage = "usage: python parallelPHYML.py [options]" | |
parser = OptionParser(usage) | |
group1 = OptionGroup(parser, "Inputs") | |
group1.add_option('-a', '--aligndir', action="store", dest='align', | |
default='OUT', | |
help='Alignment directory') | |
group1.add_option('-t', '--tree', action="store", dest='tree', | |
default='TREE.nwk', | |
help='Tree file') | |
group1.add_option('-r', '--threads', action="store", dest='threads', | |
default=1, | |
help='Threads [Default: 1]') | |
parser.add_option_group(group1) | |
# Parse the options | |
return parser.parse_args() | |
(options, args) = getOptions() | |
dres = MultiProcess(options.threads).doPHYML(options.align,options.tree) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
''' | |
Launch SLR on all the alignments | |
Nearly FAIL-safe | |
Marco Galardini 2012 | |
University of Florence | |
GPL v3.0 | |
''' | |
from optparse import OptionParser, OptionGroup | |
from time import strftime | |
import sys | |
import os | |
from multiprocessing.queues import Queue | |
import multiprocessing | |
import threading | |
import subprocess | |
import time | |
import shutil | |
import logging | |
logger = logging.getLogger('SLR') | |
ch = logging.StreamHandler() | |
logger.addHandler(ch) | |
class SLR(object): | |
def __init__(self, indir, align, tree): | |
self.indir = indir | |
self.align=align | |
self.tree=tree | |
self.cmd = None | |
self.process = None | |
self.return_code = -1000 | |
def __call__(self): | |
def target(): | |
self.process = subprocess.Popen(self.cmd, shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = self.process.communicate() | |
self.return_code = self.process.returncode | |
# Create the control file | |
fname = self.align + '.ctl' | |
f = open(fname, 'w') | |
f.write('seqfile: %s\ntreefile: %s\n'%(self.align, self.tree)) | |
f.write('outfile: %s.res\npositive_only: 0\nfreqtype: 1'%self.align) | |
f.close() | |
self.cmd = './Slr %s'%fname | |
thread = threading.Thread(target=target) | |
thread.start() | |
thread.join(400.0) | |
if thread.is_alive(): | |
logger.warning('Killing %s'%self.align) | |
proc = subprocess.Popen("ps -ef | grep Slr | grep %s | awk '{print $2}'"%self.align, | |
shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = proc.communicate() | |
for pid in out[0].split('\n'): | |
proc = subprocess.Popen("kill -9 %s"%pid, | |
shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = proc.communicate() | |
logger.warning('Killed %s (1)'%self.align) | |
thread.join(1) | |
logger.warning('Killed %s (2)'%self.align) | |
f = open(fname, 'w') | |
f.write('seqfile: %s\ntreefile: %s\n'%(self.align, self.tree)) | |
f.write('outfile: %s.res\npositive_only: 0\nfreqtype: 1\nskipsitewise: 1'%self.align) | |
f.close() | |
self.cmd = './Slr %s'%fname | |
thread = threading.Thread(target=target) | |
thread.start() | |
thread.join(400.0) | |
if thread.is_alive(): | |
logger.warning('Giving up on %s'%self.align) | |
proc = subprocess.Popen("ps -ef | grep Slr | grep %s | awk '{print $2}'"%self.align, | |
shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = proc.communicate() | |
for pid in out[0].split('\n'): | |
proc = subprocess.Popen("kill -9 %s"%pid, | |
shell=(sys.platform!="win32"), | |
stdin=subprocess.PIPE,stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
out = proc.communicate() | |
logger.warning('Killed %s (1a)'%self.align) | |
thread.join(1) | |
logger.warning('Killed %s (2a)'%self.align) | |
return (self.align,-666) | |
return (self.align,self.return_code) | |
class Consumer(multiprocessing.Process): | |
def __init__(self, | |
task_queue = multiprocessing.Queue(), | |
result_queue = multiprocessing.Queue()): | |
multiprocessing.Process.__init__(self) | |
self.task_queue = task_queue | |
self.result_queue = result_queue | |
def run(self): | |
while True: | |
next_task = self.task_queue.get() | |
time.sleep(0.01) | |
if next_task is None: | |
# Poison pill means we should exit | |
break | |
answer = next_task() | |
self.result_queue.put(answer) | |
return | |
class MultiProcess(object): | |
''' | |
Class MultiProcess | |
An object that can perform multiprocesses | |
''' | |
def __init__(self,ncpus=1): | |
self.ncpus = int(ncpus) | |
# Parallelization | |
self._parallel = None | |
self._paralleltasks = Queue() | |
self._parallelresults = Queue() | |
def initiateParallel(self): | |
self._parallel = [Consumer(self._paralleltasks,self._parallelresults) | |
for x in range(self.ncpus)] | |
for consumer in self._parallel: | |
consumer.start() | |
def addPoison(self): | |
for consumer in self._parallel: | |
self._paralleltasks.put(None) | |
def isTerminated(self): | |
for consumer in self._parallel: | |
if consumer.is_alive(): | |
return False | |
return True | |
def killParallel(self): | |
for consumer in self._parallel: | |
consumer.terminate() | |
def doSLR(self, indir, tree): | |
i = 0 | |
dres = {} | |
redo = open('slrfail.txt','w') | |
self.initiateParallel() | |
for f in os.listdir(indir): | |
if not f.endswith('.phy'):continue | |
align = f | |
obj = SLR(indir, align, tree) | |
self._paralleltasks.put(obj) | |
# Poison pill to stop the workers | |
self.addPoison() | |
while True: | |
while not self._parallelresults.empty(): | |
result = self._parallelresults.get() | |
if result[1] != 0: | |
msg(result[0],'ERR') | |
redo.write('%s\n'%result[0]) | |
else: | |
msg('%s %d'%(result[0],i),'IMP') | |
i += 1 | |
if self.isTerminated(): | |
break | |
time.sleep(0.1) | |
# Get the last messages | |
while not self._parallelresults.empty(): | |
result = self._parallelresults.get() | |
if result[1] != 0: | |
msg(result[0],'ERR') | |
redo.write('%s\n'%result[0]) | |
else: | |
msg('%s %d'%(result[0],i),'IMP') | |
i += 1 | |
self.killParallel() | |
return dres | |
class Highlighter: | |
def __init__(self): | |
self._msgTypes={'INF':'\033[0m', | |
'IMP':'\033[1;32m', | |
'DEV':'\033[1;34m', | |
'ERR':'\033[1;31m', | |
'WRN':'\033[1;33m'} | |
self._reset='\033[0m' | |
self._default='INF' | |
def ColorMsg(self,msg,msgLevel='INF'): | |
try: | |
s=self._msgTypes[msgLevel]+msg+self._reset | |
except:s=s=self._msgTypes[self._default]+msg+self._reset | |
return s | |
def msg(message, msgLevel='INF', sameline=False): | |
o=Highlighter() | |
if sameline: | |
sys.stderr.write('\r') | |
else: | |
sys.stderr.write(strftime("%H:%M:%S") + ' ') | |
sys.stderr.write(o.ColorMsg(message,msgLevel)) | |
if not sameline: | |
sys.stderr.write('\n') | |
def creturn(): | |
sys.stderr.write('\n') | |
def getOptions(): | |
'''Retrieve the options passed from the command line''' | |
usage = "usage: python parallelSLR.py [options]" | |
parser = OptionParser(usage) | |
group1 = OptionGroup(parser, "Inputs") | |
group1.add_option('-t', '--tree', action="store", dest='tree', | |
default='TREE.nwk', | |
help='Tree file') | |
group1.add_option('-r', '--threads', action="store", dest='threads', | |
default=1, | |
help='Threads [Default: 1]') | |
parser.add_option_group(group1) | |
# Parse the options | |
return parser.parse_args() | |
(options, args) = getOptions() | |
dres = MultiProcess(options.threads).doSLR('.',options.tree) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment