Created
June 4, 2014 07:30
-
-
Save cfriedline/0095ad55d645a7202cc6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:5507a20bf985d7ff9c8b454bf414997a33b671d82b7ceaaf24f89a61d1397ce0" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import os\n", | |
"os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n", | |
"import rpy2.robjects\n", | |
"import random\n", | |
"import string\n", | |
"import tempfile\n", | |
"import dendropy\n", | |
"import scipy\n", | |
"from multiprocessing import Pool\n", | |
"from IPython.parallel import Client" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"rc = Client(profile='sge')\n", | |
"dview = rc[:]\n", | |
"lview = rc.load_balanced_view()\n", | |
"len(rc)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"10" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"with dview.sync_imports():\n", | |
" import os\n", | |
" import rpy2\n", | |
" import rpy2.robjects\n", | |
" import random\n", | |
" import string\n", | |
" import tempfile\n", | |
" import dendropy\n", | |
" import socket\n", | |
" import scipy\n", | |
" from multiprocessing import Pool\n", | |
" \n", | |
"def setup_cluster_engines():\n", | |
" os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n", | |
" r = rpy2.robjects.r\n", | |
" ape = r('library(ape)')\n", | |
" return socket.gethostname(), os.getpid(), os.environ['R_HOME'], rpy2.__version__\n", | |
"dview['setup_cluster_engines'] = setup_cluster_engines\n", | |
"dview.apply(setup_cluster_engines).get()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"importing os on engine(s)\n", | |
"importing rpy2 on engine(s)\n", | |
"importing rpy2.robjects on engine(s)\n", | |
"importing random on engine(s)\n", | |
"importing string on engine(s)\n", | |
"importing tempfile on engine(s)" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"importing dendropy on engine(s)\n", | |
"importing socket on engine(s)\n", | |
"importing scipy on engine(s)\n", | |
"importing Pool from multiprocessing on engine(s)\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"[('godel199', 17947, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 28753, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 28756, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 28781, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 28779, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel97', 28752, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 18050, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 18095, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 18083, '/home/cfriedline/lib64/R', '2.4.0'),\n", | |
" ('godel199', 18077, '/home/cfriedline/lib64/R', '2.4.0')]" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"r = rpy2.robjects.r\n", | |
"ape = r('library(ape)')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def create_tree(num_tips, type):\n", | |
" \"\"\"\n", | |
" creates the taxa tree in R\n", | |
" @param num_tips: number of taxa to create\n", | |
" @param type: type for naming (e.g., 'taxa')\n", | |
" @return: a dendropy Tree\n", | |
" @rtype: dendropy.Tree\n", | |
" \"\"\"\n", | |
" r = rpy2.robjects.r\n", | |
" rpy2.robjects.globalenv['numtips'] = num_tips\n", | |
" rpy2.robjects.globalenv['treetype'] = type\n", | |
" name = _get_random_string(20)\n", | |
" if type == \"T\":\n", | |
" r(\"%s = rtree(numtips, rooted=T, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n", | |
" else:\n", | |
" r(\"%s = rtree(numtips, rooted=F, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n", | |
" tree = r[name]\n", | |
" return ape_to_dendropy(tree)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def ape_to_dendropy(phylo):\n", | |
" \"\"\"\n", | |
" converts an ape tree to dendropy tree\n", | |
" @param phylo: ape instance from rpy2\n", | |
" @return: a dendropy tree\n", | |
" @rtype: dendropy.Tree\n", | |
" \"\"\"\n", | |
" f = tempfile.NamedTemporaryFile()\n", | |
" rpy2.robjects.r['write.nexus'](phylo, file=f.name)\n", | |
" tree = dendropy.Tree.get_from_path(f.name, \"nexus\")\n", | |
" f.close()\n", | |
" return tree" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def _get_random_string(length):\n", | |
" \"\"\"\n", | |
" gets a random string of letters/numbers, ensuring that it does not start with a\n", | |
" number\n", | |
" @param length: length of the string\n", | |
" @return: the random string\n", | |
" @rtype: string\n", | |
" \"\"\"\n", | |
" choices = \"%s%s\" % (string.letters,string.digits)\n", | |
" s = ''.join(scipy.random.choice(list(choices),10))\n", | |
" if s[0] not in string.letters:\n", | |
" return _get_random_string(length)\n", | |
" return s" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"tree = create_tree(100, \"T\")\n", | |
"tree.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
"'((((T88:0.2252453833,T52:0.4084565411):0.2996090709,((((T68:0.1808569834,T67:0.2'" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def reseed(args):\n", | |
" scipy.random.seed()\n", | |
" return os.getpid()\n", | |
"\n", | |
"def pool_reseed(pool, jobs):\n", | |
" res = pool.map(reseed, range(jobs))\n", | |
" if len(set(res)) != jobs:\n", | |
" return pool_reseed(pool, jobs)\n", | |
" return True, res" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def create_tree_mp(num_taxa):\n", | |
" t = create_tree(num_taxa, \"T\")\n", | |
" return t\n", | |
"\n", | |
"def get_taxa_trees(num_trees, num_taxa):\n", | |
" jobs_mp = []\n", | |
" jobs_ip = []\n", | |
" jobs = []\n", | |
" res = []\n", | |
" pool = Pool(num_trees)\n", | |
" print pool_reseed(pool, num_trees)\n", | |
" for i in xrange(num_trees):\n", | |
" jobs_mp.append(pool.apply_async(create_tree_mp, (num_taxa,)))\n", | |
" jobs.append(create_tree_mp(num_taxa))\n", | |
" jobs_ip.append(lview.apply_async(create_tree_mp, num_taxa))\n", | |
" pool.close()\n", | |
" pool.join()\n", | |
" res.append(jobs)\n", | |
" res.append([x.get() for x in jobs_mp])\n", | |
" res.append([x.get() for x in jobs_ip])\n", | |
" return res" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"dview['create_tree'] = create_tree\n", | |
"dview['_get_random_string'] = _get_random_string\n", | |
"dview['ape_to_dendropy'] = ape_to_dendropy\n", | |
"dview['create_tree_mp'] = create_tree_mp\n", | |
"dview['get_taxa_trees'] = get_taxa_trees" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# jobs = 10\n", | |
"# pool = Pool(jobs)\n", | |
"# pool_reseed(pool, jobs)\n", | |
"# if pool_reseed:\n", | |
"# for i in xrange(10):\n", | |
"# print pool.apply_async(_get_random_string, (20,)).get()\n", | |
"# pool.close()\n", | |
"# pool.join()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"trees = get_taxa_trees(5, 10)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(True, [3499, 3500, 3501, 3503, 3502])\n" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for t in trees[0]:\n", | |
" print t.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n", | |
"(((T7:0.1649202022,(((T6:0.432419851,T1:0.670651369):0.9676914262,(T9:0.25170810\n", | |
"(((T9:0.1202223536,((T10:0.1974260821,T2:0.9239173683):0.3418610748,T8:0.3907029\n", | |
"(((T6:0.3341352192,T8:0.7529280938):0.5023088937,(((T1:0.2147031985,T10:0.728587\n", | |
"(T5:0.870002911,(((T10:0.2627973619,T7:0.5444249122):0.1685904923,T3:0.799313113\n" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for t in trees[1]:\n", | |
" print t.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n", | |
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n", | |
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n", | |
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n", | |
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n" | |
] | |
} | |
], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for t in trees[2]:\n", | |
" print t.as_newick_string()[0:80]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(((T10:0.5095300437,T2:0.8708823652):0.1151310115,T8:0.6142042589):0.9769569514,\n", | |
"(((T5:0.1514588273,((T1:0.4826061821,T7:0.1636368397):0.9322419968,(T6:0.8749330\n", | |
"((T10:0.9323699088,(T1:0.2327637093,(((T2:0.8742392994,T4:0.8461208662):0.681684\n", | |
"(((T4:0.946286211,T8:0.6548262595):0.8693261528,(T5:0.911844848,T1:0.6759150678)\n", | |
"((((T2:0.7475027808,T10:0.4808094592):0.7282650121,(T9:0.5261673003,T5:0.9762509\n" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment