Last active
June 25, 2016 23:21
-
-
Save lczech/f42ed5c6f229a078449b308ad5497919 to your computer and use it in GitHub Desktop.
Extract lists of pquery names per branch of the reference tree, using genesis v0.7.0 (https://github.com/lczech/genesis).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Genesis - A toolkit for working with phylogenetic data. | |
Copyright (C) 2014-2016 Lucas Czech | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses/>. | |
Contact: | |
Lucas Czech <[email protected]> | |
Exelixis Lab, Heidelberg Institute for Theoretical Studies | |
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany | |
*/ | |
#include <assert.h> | |
#include <stdexcept> | |
#include <string> | |
#include <unordered_map> | |
#include <utility> | |
#include <vector> | |
#include "placement/function/functions.hpp" | |
#include "placement/formats/jplace_reader.hpp" | |
#include "placement/sample.hpp" | |
#include "utils/core/logging.hpp" | |
#include "utils/core/fs.hpp" | |
#include "utils/text/string.hpp" | |
using namespace genesis; | |
// ================================================================================================= | |
// Main Function | |
// ================================================================================================= | |
int main( int argc, char** argv ) | |
{ | |
using namespace ::genesis::placement; | |
// Activate logging. | |
utils::Logging::log_to_stdout(); | |
// Check if the command line contains the right number of arguments and store them. | |
if (argc != 3) { | |
throw std::runtime_error( | |
"Need to provide two command line arguments:\n" | |
" * An input jplace file path.\n" | |
" * An output directory path." | |
); | |
} | |
auto jplace_filename = std::string( argv[1] ); | |
auto output_dir = utils::trim_right( std::string( argv[2] ), "/") + "/"; | |
// Some user output. | |
LOG_INFO << "Using jplace file " << jplace_filename; | |
LOG_INFO << "Using outout directory " << output_dir; | |
// Read the Jplace file into a Sample object. | |
Sample sample; | |
JplaceReader().from_file( jplace_filename, sample ); | |
// Some more output. | |
LOG_INFO << "Found " << sample.pquery_size() << " pqueries in sample."; | |
// Remove all but the most likely placement position for each pquery. | |
placement::filter_n_max_weight_placements( sample, 1 ); | |
// Create a list of pquery names per edge of the tree, indexed by the edge index. | |
std::unordered_map< size_t, std::string > edge_pquery_names; | |
for( auto const& pqry : sample.pqueries() ) { | |
// We can be sure that there is at most one placement for the pquery, as we just filtered | |
// out all others. If there is none, this is an empty pquery, so issue a warning and skip it. | |
assert( pqry.placement_size() <= 1 ); | |
if( pqry.placement_size() == 0 ) { | |
LOG_WARN << "Pquery without any placements found. Skipping this."; | |
continue; | |
} | |
// Skip if there is no name (this pquery cannot be identified anyway, so there is no need | |
// to add a default name to the list). | |
if( pqry.name_size() == 0 ) { | |
LOG_WARN << "Pquery without name found. Skipping this."; | |
continue; | |
} | |
// Add the first name of the pquery to the list at the according edge index. | |
auto const& placement = pqry.placement_at( 0 ); | |
edge_pquery_names[ placement.edge().index() ] += pqry.name_at( 0 ).name + "\n"; | |
} | |
// Now write out all edge index lists to the output dir. | |
for( auto const& edge_output : edge_pquery_names ) { | |
auto filename = output_dir + "edge_" + utils::to_string( edge_output.first ) + ".txt"; | |
// Skip if file exists. | |
if( utils::file_exists( filename )) { | |
LOG_WARN << "Output file " << filename << " exists. Skipping."; | |
continue; | |
} | |
utils::file_write( edge_output.second, filename ); | |
} | |
LOG_INFO << "Finished."; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment