Created
June 4, 2014 06:06
-
-
Save cfriedline/57c9f35b31225ced7931 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:24625e0326d219f8b77f0317e421df165d0adfb7925fac44e4f9cc5c35a28bd6" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import os\n", | |
"os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n", | |
"import rpy2.robjects\n", | |
"import random\n", | |
"import string\n", | |
"import tempfile\n", | |
"import dendropy\n", | |
"from multiprocessing import Pool\n", | |
"from IPython.parallel import Client" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"rc = Client(profile='sge')\n", | |
"dview = rc[:]\n", | |
"lview = rc.load_balanced_view()\n", | |
"len(rc)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"10" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"with dview.sync_imports():\n", | |
" import os\n", | |
" import rpy2\n", | |
" import rpy2.robjects\n", | |
" import random\n", | |
" import string\n", | |
" import tempfile\n", | |
" import dendropy\n", | |
" import socket\n", | |
" from multiprocessing import Pool\n", | |
" \n", | |
"def setup_cluster_engines():\n", | |
" os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n", | |
" r = rpy2.robjects.r\n", | |
" ape = r('library(ape)')\n", | |
" return socket.gethostname(), os.getpid(), os.environ['R_HOME'], rpy2.__version__\n", | |
"dview['setup_cluster_engines'] = setup_cluster_engines\n", | |
"dview.apply(setup_cluster_engines).get()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"importing os on engine(s)\n", | |
"importing rpy2 on engine(s)\n", | |
"importing rpy2.robjects on engine(s)\n", | |
"importing random on engine(s)\n", | |
"importing string on engine(s)" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"importing tempfile on engine(s)\n", | |
"importing dendropy on engine(s)\n", | |
"importing socket on engine(s)\n", | |
"importing Pool from multiprocessing on engine(s)" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"[('godel97', 26811, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 26870, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 26819, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 26866, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 26873, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 17510, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 17464, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 17512, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 17511, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 17507, '/home/cfriedline/lib64/R', '2.4.0')]" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"r = rpy2.robjects.r\n", | |
"ape = r('library(ape)')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def create_tree(num_tips, type):\n", | |
" \"\"\"\n", | |
" creates the taxa tree in R\n", | |
" @param num_tips: number of taxa to create\n", | |
" @param type: type for naming (e.g., 'taxa')\n", | |
" @return: a dendropy Tree\n", | |
" @rtype: dendropy.Tree\n", | |
" \"\"\"\n", | |
" r = rpy2.robjects.r\n", | |
" rpy2.robjects.globalenv['numtips'] = num_tips\n", | |
" rpy2.robjects.globalenv['treetype'] = type\n", | |
" name = _get_random_string(20)\n", | |
" if type == \"T\":\n", | |
" r(\"%s = rtree(numtips, rooted=T, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n", | |
" else:\n", | |
" r(\"%s = rtree(numtips, rooted=F, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n", | |
" tree = r[name]\n", | |
" return ape_to_dendropy(tree)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def ape_to_dendropy(phylo):\n", | |
" \"\"\"\n", | |
" converts an ape tree to dendropy tree\n", | |
" @param phylo: ape instance from rpy2\n", | |
" @return: a dendropy tree\n", | |
" @rtype: dendropy.Tree\n", | |
" \"\"\"\n", | |
" f = tempfile.NamedTemporaryFile()\n", | |
" rpy2.robjects.r['write.nexus'](phylo, file=f.name)\n", | |
" tree = dendropy.Tree.get_from_path(f.name, \"nexus\")\n", | |
" f.close()\n", | |
" return tree" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def _get_random_string(length):\n", | |
" \"\"\"\n", | |
" gets a random string of letters/numbers, ensuring that it does not start with a\n", | |
" number\n", | |
" @param length: length of the string\n", | |
" @return: the random string\n", | |
" @rtype: string\n", | |
" \"\"\"\n", | |
" s = ''.join(random.choice(string.letters + string.digits) for i in xrange(length))\n", | |
" if not s[0] in string.letters:\n", | |
" return _get_random_string(length)\n", | |
" return s" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"tree = create_tree(100, \"T\")\n", | |
"tree.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
"'(((T6:0.3194573249,T88:0.7137461954):0.426507879,T42:0.09966852632):0.2807099563'" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def create_tree_mp(num_taxa):\n", | |
" t = create_tree(num_taxa, \"T\")\n", | |
" return t\n", | |
"\n", | |
"def get_taxa_trees(num_trees, num_taxa):\n", | |
" jobs_mp = []\n", | |
" jobs_ip = []\n", | |
" jobs = []\n", | |
" res = []\n", | |
" pool = Pool(num_trees)\n", | |
" for i in xrange(num_trees):\n", | |
" jobs_mp.append(pool.apply_async(create_tree_mp, (num_taxa,)))\n", | |
" jobs.append(create_tree_mp(num_taxa))\n", | |
" jobs_ip.append(lview.apply_async(create_tree_mp, num_taxa))\n", | |
" pool.close()\n", | |
" pool.join()\n", | |
" res.append(jobs)\n", | |
" res.append([x.get() for x in jobs_mp])\n", | |
" res.append([x.get() for x in jobs_ip])\n", | |
" return res" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"dview['create_tree'] = create_tree\n", | |
"dview['_get_random_string'] = _get_random_string\n", | |
"dview['ape_to_dendropy'] = ape_to_dendropy\n", | |
"dview['create_tree_mp'] = create_tree_mp\n", | |
"dview['get_taxa_trees'] = get_taxa_trees" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"trees = get_taxa_trees(5, 10)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for t in trees[0]:\n", | |
" print t.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n", | |
"((T7:0.5806423163,(T5:0.007633442292,T2:0.2177157113):0.09601430735):0.305033218\n", | |
"((((((T6:0.2959137154,T7:0.4578026251):0.8627177938,(T8:0.08809919422,T3:0.10175\n", | |
"(T7:0.2577645453,((T10:0.1396945077,T4:0.6536828352):0.7271465769,(T2:0.26139505\n", | |
"((T10:0.01782502211,T5:0.9478464343):0.4384911573,((T1:0.201613334,T9:0.52947913\n" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for t in trees[1]:\n", | |
" print t.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n", | |
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n", | |
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n", | |
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n", | |
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for t in trees[2]:\n", | |
" print t.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(((T1:0.1959578383,(T4:0.4092330183,T5:0.1314637179):0.6633579263):0.07092879713\n", | |
"((((T10:0.5492198297,(T9:0.8054336836,T8:0.9056934002):0.6771697227):0.465427928\n", | |
"(((T9:0.06959938002,((T10:0.05167591712,T1:0.7737517059):0.6082972996,T7:0.63657\n", | |
"(T6:0.5223825315,((((T10:0.3305313124,T3:0.1639568484):0.5333597104,(T1:0.155836\n", | |
"((T4:0.9040003123,(T1:0.7009222899,(T9:0.5908546504,T7:0.3244189187):0.872680337\n" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment