Last active
September 30, 2016 06:25
-
-
Save lczech/24c12a912e4eb0ed38d517d887c42db5 to your computer and use it in GitHub Desktop.
Write all placements of a jplace file into a list, including the closted leaf name of the tree, using genesis v0.11.0 (https://github.com/lczech/genesis).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Genesis - A toolkit for working with phylogenetic data. | |
Copyright (C) 2014-2016 Lucas Czech | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses/>. | |
Contact: | |
Lucas Czech <[email protected]> | |
Exelixis Lab, Heidelberg Institute for Theoretical Studies | |
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany | |
*/ | |
#include "genesis.hpp" | |
#include <algorithm> | |
#include <iostream> | |
#include <fstream> | |
using namespace genesis; | |
int main( int argc, const char* argv[] ) | |
{ | |
using namespace ::genesis::placement; | |
// How many placements per Pquery shall be printed? If set to 0, all of them. If set to a | |
// value >0, only this many placements are printed (sorted by like_weight_ratio). | |
size_t n_max_placements = 0; | |
// Activate logging. | |
utils::Logging::log_to_stdout(); | |
// Check if the command line contains the right number of arguments. | |
if (argc != 3) { | |
throw std::runtime_error( | |
"Need to provide an input jplace file path and a table output file path as command line argument." | |
); | |
} | |
auto jplace_filename = std::string( argv[1] ); | |
auto output_filename = std::string( argv[2] ); | |
// Some user output. | |
LOG_INFO << "Using jplace file " << jplace_filename; | |
LOG_INFO << "Using output file " << output_filename; | |
// Read the jplace file into a Sample. Sort by like weight ratio. | |
Sample smp; | |
JplaceReader().from_file( jplace_filename, smp ); | |
sort_placements_by_weight( smp ); | |
// More user output. | |
LOG_INFO << "Found " << smp.pquery_size() << " placements."; | |
// Get a list that contains the closest leaf node for all nodes of the tree. | |
auto closest_leaf_nodes = tree::closest_leaf_depth_vector( smp.tree() ); | |
// Get the edpl values for all pqueries of the sample. | |
auto edpl_vec = edpl( smp ); | |
// Open the output file for writing. | |
std::ofstream outfile; | |
outfile.open( output_filename ); | |
if( ! outfile ) { | |
throw std::runtime_error( "Cannot open output file " + output_filename ); | |
} | |
// Header line. | |
outfile << "name\trank\tmultiplicity\tedge_num\tlike_weight_ratio\tlikelihood\t"; | |
outfile << "proximal_length\tdistal_length\tpendant_length\tedpl\tclosest_leaf\tdepth\n"; | |
// Loop over the Pqueries of the Sample. | |
for( size_t i = 0; i < smp.pquery_size(); ++i ) { | |
auto const& pquery = smp.pquery_at(i); | |
// Use the first name of the pquery if possible; use default name otherwise. | |
std::string name = "Unnamed Pquery"; | |
double multiplicity = 1.0; | |
if( pquery.name_size() > 0 ) { | |
name = pquery.name_at( 0 ).name; | |
multiplicity = pquery.name_at( 0 ).multiplicity; | |
} | |
// Output all placement positions of this Pquery. | |
size_t count = 1; | |
for( auto const& placement : pquery.placements() ) { | |
// Output the name of the pquery. | |
outfile << name; | |
// Output a counter/rank (the nth most probably placement position of this Pquery). | |
outfile << "\t" << count; | |
// Calc distal length from branch length and proximal length. | |
auto distal_len = placement.edge().data<tree::DefaultEdgeData>().branch_length | |
- placement.proximal_length; | |
// Output the properties. | |
outfile << "\t" << multiplicity; | |
outfile << "\t" << placement.edge().index(); | |
outfile << "\t" << placement.like_weight_ratio; | |
outfile << "\t" << placement.likelihood; | |
outfile << "\t" << placement.proximal_length; | |
outfile << "\t" << distal_len; | |
outfile << "\t" << placement.pendant_length; | |
outfile << "\t" << edpl_vec.at(i); | |
// Get both nodes at the end of the placement's edge. | |
// Both are then a pair containing the node as first element and the distance to it | |
// as the second element. | |
auto leaf_p = closest_leaf_nodes[ placement.edge().primary_node().index() ]; | |
auto leaf_s = closest_leaf_nodes[ placement.edge().secondary_node().index() ]; | |
// Check which node at the end of the edge is closer to a leaf. | |
// Use the name of the closer leaf and output its name and the distance to the placement. | |
if( leaf_p.second < leaf_p.second ) { | |
outfile << "\t" << leaf_p.first->data<tree::DefaultNodeData>().name; | |
outfile << "\t" << leaf_p.second; | |
} else { | |
outfile << "\t" << leaf_s.first->data<tree::DefaultNodeData>().name; | |
outfile << "\t" << leaf_s.second; | |
} | |
outfile << "\n"; | |
// Exit condition. We printed n many placments for this pquery. | |
++count; | |
if( n_max_placements > 0 && count > n_max_placements ) { | |
break; | |
} | |
} | |
} | |
// Finish. | |
outfile.close(); | |
LOG_INFO << "Finished."; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment