Skip to content

Instantly share code, notes, and snippets.

@lczech
Last active September 30, 2016 06:25
Show Gist options
  • Save lczech/24c12a912e4eb0ed38d517d887c42db5 to your computer and use it in GitHub Desktop.
Save lczech/24c12a912e4eb0ed38d517d887c42db5 to your computer and use it in GitHub Desktop.
Write all placements of a jplace file into a list, including the closted leaf name of the tree, using genesis v0.11.0 (https://github.com/lczech/genesis).
/*
Genesis - A toolkit for working with phylogenetic data.
Copyright (C) 2014-2016 Lucas Czech
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact:
Lucas Czech <[email protected]>
Exelixis Lab, Heidelberg Institute for Theoretical Studies
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#include "genesis.hpp"
#include <algorithm>
#include <iostream>
#include <fstream>
using namespace genesis;
int main( int argc, const char* argv[] )
{
using namespace ::genesis::placement;
// How many placements per Pquery shall be printed? If set to 0, all of them. If set to a
// value >0, only this many placements are printed (sorted by like_weight_ratio).
size_t n_max_placements = 0;
// Activate logging.
utils::Logging::log_to_stdout();
// Check if the command line contains the right number of arguments.
if (argc != 3) {
throw std::runtime_error(
"Need to provide an input jplace file path and a table output file path as command line argument."
);
}
auto jplace_filename = std::string( argv[1] );
auto output_filename = std::string( argv[2] );
// Some user output.
LOG_INFO << "Using jplace file " << jplace_filename;
LOG_INFO << "Using output file " << output_filename;
// Read the jplace file into a Sample. Sort by like weight ratio.
Sample smp;
JplaceReader().from_file( jplace_filename, smp );
sort_placements_by_weight( smp );
// More user output.
LOG_INFO << "Found " << smp.pquery_size() << " placements.";
// Get a list that contains the closest leaf node for all nodes of the tree.
auto closest_leaf_nodes = tree::closest_leaf_depth_vector( smp.tree() );
// Get the edpl values for all pqueries of the sample.
auto edpl_vec = edpl( smp );
// Open the output file for writing.
std::ofstream outfile;
outfile.open( output_filename );
if( ! outfile ) {
throw std::runtime_error( "Cannot open output file " + output_filename );
}
// Header line.
outfile << "name\trank\tmultiplicity\tedge_num\tlike_weight_ratio\tlikelihood\t";
outfile << "proximal_length\tdistal_length\tpendant_length\tedpl\tclosest_leaf\tdepth\n";
// Loop over the Pqueries of the Sample.
for( size_t i = 0; i < smp.pquery_size(); ++i ) {
auto const& pquery = smp.pquery_at(i);
// Use the first name of the pquery if possible; use default name otherwise.
std::string name = "Unnamed Pquery";
double multiplicity = 1.0;
if( pquery.name_size() > 0 ) {
name = pquery.name_at( 0 ).name;
multiplicity = pquery.name_at( 0 ).multiplicity;
}
// Output all placement positions of this Pquery.
size_t count = 1;
for( auto const& placement : pquery.placements() ) {
// Output the name of the pquery.
outfile << name;
// Output a counter/rank (the nth most probably placement position of this Pquery).
outfile << "\t" << count;
// Calc distal length from branch length and proximal length.
auto distal_len = placement.edge().data<tree::DefaultEdgeData>().branch_length
- placement.proximal_length;
// Output the properties.
outfile << "\t" << multiplicity;
outfile << "\t" << placement.edge().index();
outfile << "\t" << placement.like_weight_ratio;
outfile << "\t" << placement.likelihood;
outfile << "\t" << placement.proximal_length;
outfile << "\t" << distal_len;
outfile << "\t" << placement.pendant_length;
outfile << "\t" << edpl_vec.at(i);
// Get both nodes at the end of the placement's edge.
// Both are then a pair containing the node as first element and the distance to it
// as the second element.
auto leaf_p = closest_leaf_nodes[ placement.edge().primary_node().index() ];
auto leaf_s = closest_leaf_nodes[ placement.edge().secondary_node().index() ];
// Check which node at the end of the edge is closer to a leaf.
// Use the name of the closer leaf and output its name and the distance to the placement.
if( leaf_p.second < leaf_p.second ) {
outfile << "\t" << leaf_p.first->data<tree::DefaultNodeData>().name;
outfile << "\t" << leaf_p.second;
} else {
outfile << "\t" << leaf_s.first->data<tree::DefaultNodeData>().name;
outfile << "\t" << leaf_s.second;
}
outfile << "\n";
// Exit condition. We printed n many placments for this pquery.
++count;
if( n_max_placements > 0 && count > n_max_placements ) {
break;
}
}
}
// Finish.
outfile.close();
LOG_INFO << "Finished.";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment