Last active
June 8, 2022 10:01
-
-
Save gavlooth/f37bb312c5d163b1d889cdb6fd7b4df5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%module gbwtgraph | |
%feature("flatnested", "1"); | |
%feature("notabstract") GBWTGraph; | |
%include <std_pair.i> | |
%include <std_vector.i> | |
%include <std_string.i> | |
%include <stdint.i> | |
%inline { | |
#include <typeinfo> | |
} | |
%inline{ | |
typedef long long int nid_t; | |
} | |
%typemap(in) nid_t { | |
$1 = static_cast<uint64_t>(SWIG_convert_long ($input)); | |
}; | |
%typemap(out) nid_t { | |
$result = scheme_make_integer_value((int)$1); | |
}; | |
%apply nid_t {gbwt::node_type, node_type, uint64_t} | |
%typemap(out) std::vector<handle_t> | |
{ | |
int vector_size=$1.size(); | |
Scheme_Object* list_out[vector_size] ; | |
for(std::size_t i = 0; i < vector_size; ++i) { | |
Scheme_Object* scheme_pair; | |
uint64_t tmp = reinterpret_cast<uint64_t&>($1.at(i)) >> 1 ; | |
bool tmp2 = reinterpret_cast<uint64_t&>($1.at(i)) & 1 ; | |
Scheme_Object* orientation; | |
Scheme_Object* scheme_integer = scheme_make_integer_value(tmp); | |
if(tmp2) | |
{ | |
orientation = scheme_make_byte_string("-") ; | |
} | |
else | |
{ | |
orientation = scheme_make_byte_string("+") ; | |
} | |
scheme_pair = scheme_make_pair(orientation, scheme_integer) ; | |
list_out[i] = scheme_pair; | |
} | |
Scheme_Object* scheme_result = scheme_build_list ( vector_size , list_out); | |
$result = scheme_result; | |
}; | |
%typemap(in) vector<long long int> { | |
Scheme_Object* the_list = $input; | |
int lg = (int) scheme_list_length( $input); | |
std::vector<long long int> *return_val = new (std::vector<long long int>); | |
long long int *tmp; | |
for(int i = 0; i < lg ; ++i) | |
{ | |
scheme_get_long_long_val(scheme_car(the_list), tmp ) ; | |
return_val->push_back (reinterpret_cast<long long int&>(*tmp)); | |
the_list = scheme_cdr(the_list) ; | |
} | |
$1 = *return_val ; | |
}; | |
%typemap(in) vector<double> { | |
Scheme_Object* the_list = $input; | |
int lg = (int) scheme_list_length($input); | |
std::vector<double>* return_val = new (std::vector<double>); | |
double tmp; | |
for(int i = 0; i < lg ; ++i) | |
{ | |
return_val->push_back( scheme_real_to_double(scheme_car(the_list))); | |
the_list = scheme_cdr(the_list) ; | |
} | |
$1 = *return_val ; | |
}; | |
%inline %{ | |
typedef uint64_t node_type ; | |
typedef uint64_t size_type ; | |
typedef std::pair<size_type , size_type> range_type ; | |
%} ; | |
%{ | |
#include<handlegraph/types.hpp> | |
#include <handlegraph/util.hpp> | |
#include <gbwtgraph/gbwtgraph.h> | |
#include <gbwtgraph/gfa.h> | |
#include <gbwt/gbwt.h> | |
#include <gbwt/utils.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <iostream> | |
#include <sstream> | |
#include <random> | |
using namespace std; | |
using namespace gbwtgraph; | |
using namespace gbwt; | |
typedef long long int nid_t; | |
SearchState* convert_void (void *input ) { | |
return (SearchState*) input ; | |
} ; | |
SearchState pointer_to_searchstate (void *input ) { | |
SearchState *tmp = (SearchState*) input; | |
return (SearchState) *tmp ; | |
} | |
long long int test_vector (vector<nid_t> nodes) | |
{ | |
return nodes[0]; | |
}; | |
vector<void*> vector_test (vector<void*> tmp) | |
{ | |
return tmp; | |
} ; | |
typedef const handle_t& ref_handle ; | |
typedef gbwt::vector_type vector_type ; | |
class GRAPH : public GBWTGraph{ | |
public: | |
GRAPH(const GBWT& gbwt_index, const SequenceSource& sequence_source):GBWTGraph::GBWTGraph( gbwt_index, sequence_source) {} | |
bool follow_edges (const handle_t& handle, bool go_left, const function<bool(const handle_t&)>& iteratee) | |
{ | |
return this->follow_edges_impl (handle, go_left, iteratee) ; | |
} | |
bool for_each_handle (const function<bool(const handle_t&)>& iteratee, bool parallel = false) | |
{ | |
return this->for_each_handle_impl(iteratee, parallel) ; | |
} | |
SearchState extend(SearchState state, nid_t node) const {return this->index->extend(state, node);}; | |
gbwt::SearchState extend(SearchState state, handle_t handle) const {return this->index->extend(state, handle_to_node(handle) );}; | |
void serialize_members(std::ostream& out) const{ | |
this->serialize_members (out) ; | |
} | |
// Underlying implementation to "deserialize" method. | |
// Load the sequences from the istream. | |
// Throws sdsl::simple_sds::InvalidData if sanity checks fail. | |
// User must call set_gbwt() before using the graph. | |
void deserialize_members(std::istream& in){ | |
this-> deserialize_members(in) ; | |
} | |
std::vector<handle_t> collect_succesive_edges (nid_t node){ | |
std::vector<handle_t>* edges = new (std::vector<handle_t>); | |
handle_t handle = get_handle(node) ; | |
std::function<bool(const handle_t&)> iteratee = | |
[&edges](handle_t handle) { | |
edges->push_back(handle) ; | |
return true; | |
}; | |
bool is_end = this->follow_edges(handle, false, iteratee); | |
return *edges; | |
} | |
std::vector<handle_t> collect_succesive_edges (handle_t handle){ | |
std::vector<handle_t>* edges = new (std::vector<handle_t>); | |
std::function<bool(const handle_t&)> iteratee = | |
[&edges](handle_t handle) { | |
edges->push_back(handle) ; | |
return true; | |
}; | |
bool is_end = this->follow_edges(handle, false, iteratee); | |
return *edges; | |
} | |
std::vector<gbwt::SearchState> collect_succesive_paths (handle_t handle){ | |
std::vector<gbwt::SearchState>* paths = new (std::vector<gbwt::SearchState>); | |
gbwt::SearchState initial_state = this->get_state(handle) ; | |
std::function<bool(const SearchState&)> iteratee = | |
[&paths](gbwt::SearchState state) { | |
paths->push_back(state) ; | |
return true; | |
}; | |
bool is_end = this->follow_paths(initial_state , iteratee); | |
return *paths; | |
} | |
std::vector<gbwt::SearchState> collect_succesive_paths (nid_t node){ | |
return this->collect_succesive_paths (this->node_to_handle(node)); | |
} | |
std::vector<gbwt::SearchState> collect_succesive_paths (handle_t handle , int number_of_paths ){ | |
std::vector<gbwt::SearchState>* paths = new (std::vector<gbwt::SearchState>); | |
if (number_of_paths < 1) | |
return *paths; | |
gbwt::SearchState initial_state = this->get_state(handle) ; | |
int counter = number_of_paths ; | |
std::function<bool(const SearchState&)> iteratee = | |
[&paths,&counter](gbwt::SearchState state) { | |
paths->push_back(state) ; | |
if (counter==0) | |
return false; | |
counter--; | |
return true; | |
}; | |
bool is_end = this->follow_paths(initial_state , iteratee); | |
return *paths; | |
} | |
std::vector<gbwt::SearchState> collect_succesive_paths (nid_t node, int number_of_paths ){ | |
return this->collect_succesive_paths (this->node_to_handle(node), number_of_paths); | |
} | |
vector<void*> extend_to_valid_states (SearchState state){ | |
long long int node_id = state.node; | |
std::vector<void*>* edge_states = new (vector<void*>); | |
handle_t handle = get_handle(node_id) ; | |
std::function<bool(const handle_t&)> iteratee = | |
[&edge_states,&state, *this](handle_t handle) { | |
SearchState *tmp = new(SearchState); | |
*tmp = state; | |
SearchState *tmp1 = new(SearchState); | |
*tmp1 = this->index->extend( *tmp , handle_to_node(handle)) ; | |
edge_states->push_back((void*)tmp1); | |
return true; | |
}; | |
bool is_end = this->follow_edges(handle, false, iteratee); | |
return *edge_states; | |
} | |
vector<void*> extend_to_valid_states (void* vstate){ | |
SearchState state = * (SearchState *)vstate ; | |
long long int node_id = state.node; | |
std::vector<void*>* edge_states = new (vector<void*>); | |
handle_t handle = get_handle(node_id) ; | |
std::function<bool(const handle_t&)> iteratee = | |
[&edge_states,&state, *this](handle_t handle) { | |
SearchState *tmp = new(SearchState); | |
*tmp = state; | |
SearchState *tmp1 = new(SearchState); | |
*tmp1 = this->index->extend( *tmp , handle_to_node(handle)) ; | |
edge_states->push_back((void*)tmp1); | |
return true; | |
}; | |
bool is_end = this->follow_edges(handle, false, iteratee); | |
return *edge_states; | |
} | |
}; | |
GRAPH gfa_to_gbwtgraph (char* file){ | |
std::string cppString = file ; | |
std::pair<std::unique_ptr<gbwt::GBWT>, std::unique_ptr<gbwtgraph::SequenceSource>> tmp = gbwtgraph::gfa_to_gbwt(cppString); | |
GBWT* tmp_1 = tmp.first.release(); | |
SequenceSource* tmp_2 = tmp.second.release(); | |
GRAPH* the_graph = new GRAPH(*tmp_1, *tmp_2); | |
return *the_graph ; | |
} | |
GBWT gfa_to_gbwt (char* file) | |
{ | |
std::string cppString = file ; | |
std::pair<std::unique_ptr<gbwt::GBWT>, std::unique_ptr<gbwtgraph::SequenceSource>> tmp = gbwtgraph::gfa_to_gbwt(cppString); | |
GBWT* tmp_1 = tmp.first.release(); | |
return *tmp_1 ; | |
} | |
vector<void *> sample_distribution (vector<double> weights, vector<void*> nodes, int number_of_draws){ | |
std::random_device rd; | |
std::mt19937 gen(rd()); | |
std::discrete_distribution<> d(weights.begin(), weights.end()); | |
vector<void*> *sample_container = new (std::vector<void*>); | |
for(int n=0; n < number_of_draws; ++n) | |
{ | |
sample_container->push_back(nodes[d(gen)]); | |
}; | |
return *sample_container; | |
} | |
%}; | |
/* typedef std::pair<size_type , size_type> range_type ; */ | |
%typemap (out) range_type // std::pair<nid_t, nid_t> | |
{ | |
Scheme_Object* start_node = scheme_make_integer_value($1.first); | |
Scheme_Object* end_node = scheme_make_integer_value($1.second); | |
Scheme_Object* node_pair = scheme_make_pair(start_node, end_node) ; | |
$result = node_pair; | |
}; | |
%apply range_type {gbwt::range_type, std::pair<size_type, size_type> } | |
%inline { | |
typedef const handle_t& ref_handle ; | |
} | |
%typemap(out) handle_t | |
{ | |
Scheme_Object* scheme_pair; | |
nid_t tmp = handlegraph::as_integer($1) >> 1 ; | |
bool tmp2 = handlegraph::as_integer($1) & 1 ; | |
Scheme_Object* orientation; | |
Scheme_Object* scheme_integer = scheme_make_integer_value(tmp); | |
if(tmp2) | |
{ | |
orientation = scheme_make_byte_string("-") ;} | |
else | |
{ | |
orientation = scheme_make_byte_string("+") ;} | |
$result = scheme_make_pair(orientation, scheme_integer); | |
}; | |
%inline{ | |
inline static handle_t pack(const uint64_t& number, const bool& bit) { | |
assert(number < (0x1ULL << 63)); | |
return handlegraph::as_handle((number << 1) | (bit ? 1 : 0)); | |
} | |
} | |
%typemap(in) ref_handle | |
{ | |
handle_t tmp; //= handlegraph::as_handle((uint64_t) 4); | |
uint64_t node_id; | |
Scheme_Object* obj2; | |
stringstream orientation; | |
if (SCHEME_PAIRP($input)){ | |
Scheme_Object* obj1 = SCHEME_CAR ($input); | |
if ( SCHEME_STRINGP(obj1) ) | |
{ | |
orientation << SCHEME_STR_VAL(obj1); | |
}; | |
obj2 = SCHEME_CDR ($input); | |
node_id = (uint64_t)SWIG_convert_long(obj2); | |
}; | |
bool is_left = false ; | |
if (orientation.str() == "-") {is_left = true;}; | |
tmp= pack(node_id, is_left); | |
$1 = &tmp; | |
}; | |
%typemap (out) std::pair<std::string, std::pair<nid_t, nid_t>> | |
{ | |
Scheme_Object* the_string = scheme_make_byte_string($1.first.c_str()) ; | |
Scheme_Object* start_node = scheme_make_integer_value($1.second.first); | |
Scheme_Object* end_node = scheme_make_integer_value($1.second.second); | |
Scheme_Object* node_pair = scheme_make_pair(start_node, end_node) ; | |
Scheme_Object* pair = scheme_make_pair(the_string, node_pair) ; | |
$result = pair; | |
} ; | |
%typemap(in) vector<void*> { | |
Scheme_Object* the_list = $input; | |
int lg = (int) scheme_list_length( $input); | |
vector<void*> *return_val = new vector<void*> ; | |
void *tmp; | |
for(int i = 0; i < lg ; ++i) | |
{ | |
return_val->push_back ((void*) scheme_car(the_list)); | |
the_list = scheme_cdr(the_list) ; | |
} | |
$1 = *return_val ; | |
}; | |
%typemap(out) vector<void*>{ | |
int vector_size=$1.size(); | |
Scheme_Object* list_out[vector_size] ; | |
int the_type; | |
for(std::size_t i = 0; i < vector_size; ++i) { | |
void* scheme_list_value = $1.at(i); | |
the_type = (int) SCHEME_TYPE ( $1.at(i) ); | |
cout << "\n"; | |
cout << the_type ; | |
if (( the_type == 47 )|| ( the_type == 55 ) || ( the_type == 62 ) || ( the_type == 59)) | |
{list_out[i] = scheme_list_value;} | |
else | |
{ | |
list_out[i] = SWIG_NewPointerObj ( scheme_list_value, $descriptor(void*) , $owner); | |
} ; | |
} | |
Scheme_Object* scheme_result = scheme_build_list(vector_size , list_out); | |
$result = scheme_result; | |
}; | |
%typemap(out) vector<SearchState*>{ | |
int vector_size=$1.size(); | |
Scheme_Object* list_out[vector_size] ; | |
for(std::size_t i = 0; i < vector_size; ++i) { | |
list_out[i] = SWIG_NewPointerObj ($1.at(i), $descriptor(SearchState) , $owner); | |
} | |
Scheme_Object* scheme_result = scheme_build_list(vector_size , list_out); | |
$result = scheme_result; | |
}; | |
/* vector<float> weights */ | |
%rename(equals) operator==; | |
%rename(unequals) operator!=; | |
%include "/usr/local/include/handlegraph/types.hpp"; | |
class GRAPH { | |
public: | |
GRAPH(const GBWT& gbwt_index, const SequenceSource& sequence_source) ; | |
~GRAPH(); | |
/* std::vector<handle_t> get_all_handle_edges (void) ; */ | |
/* */ | |
/* std::vector<handle_t> get_all_handles (void) ; */ | |
bool follow_edges (ref_handle handle, bool go_left, const function<bool(const handle_t&)>& iteratee); | |
bool for_each_handle (const function<bool(const handle_t&)>& iteratee, bool parallel = false) ; | |
/* handle_t get_handle(char *value); */ | |
bool has_node(nid_t node_id) const; | |
// Look up the handle for the node with the given ID in the given orientation. | |
handle_t get_handle(uint64_t node_id, bool is_reverse = false) const; | |
// Get the ID from a handle. | |
nid_t get_id(ref_handle handle) const; | |
// Get the orientation of a handle. | |
bool get_is_reverse(ref_handle handle) const; | |
// Invert the orientation of a handle (potentially without getting its ID). | |
handle_t flip(ref_handle handle) const; | |
// Get the length of a node. | |
size_t get_length(ref_handle handle) const; | |
// Get the sequence of a node, presented in the handle's local forward | |
// orientation. | |
std::string get_sequence(ref_handle handle) const; | |
// Returns one base of a handle's sequence, in the orientation of the | |
// handle. | |
char get_base(ref_handle handle, size_t index) const; | |
// Returns a substring of a handle's sequence, in the orientation of the | |
// handle. If the indicated substring would extend beyond the end of the | |
// handle's sequence, the return value is truncated to the sequence's end. | |
std::string get_subsequence(ref_handle handle, size_t index, size_t size) const; | |
// Return the number of nodes in the graph. | |
size_t get_node_count() const; | |
nid_t min_node_id() const; | |
nid_t max_node_id() const; | |
size_t get_degree(ref_handle handle, bool go_left) const; | |
// Returns true if there is an edge that allows traversal from the left | |
// handle to the right handle. | |
bool has_edge(ref_handle left, const handle_t& right) const; | |
//------------------------------------------------------------------------------ | |
/* | |
SerializableHandleGraph interface. | |
*/ | |
void set_gbwt(const gbwt::GBWT& gbwt_index); | |
/* set_gbwt(const gbwt::GBWT& gbwt_index) */ | |
/* { */ | |
/* this->index = &gbwt_index; */ | |
/* */ | |
/* if(!(this->index->bidirectional())) */ | |
/* { */ | |
/* throw InvalidGBWT("GBWTGraph: The GBWT index must be bidirectional"); */ | |
/* } */ | |
/* } */ | |
uint32_t get_magic_number() const; | |
void serialize_members(std::ostream& out) const; | |
void deserialize_members(std::istream& in); | |
//------------------------------------------------------------------------------ | |
// Returns `true` if the graph contains a translation from node ids to segment names. | |
bool has_segment_names() const; | |
// Returns (GFA segment name, semiopen node id range) containing the handle. | |
// If there is no such translation, returns ("id", (id, id + 1)). | |
std::pair<std::string, std::pair<nid_t, nid_t>> get_segment(ref_handle handle) const; | |
// Returns (GFA segment name, starting offset in the same orientation) for the handle. | |
// If there is no translation, returns ("id", 0). | |
std::pair<std::string, size_t> get_segment_name_and_offset(ref_handle handle) const; | |
// Returns the name of the original GFA segment corresponding to the handle. | |
// If there is no translation, returns the node id as a string. | |
std::string get_segment_name(ref_handle handle) const; | |
// Returns the starting offset in the original GFA segment corresponding to the handle | |
// in the same orientation as the handle. | |
// If there is no translation, returns 0. | |
size_t get_segment_offset(ref_handle handle) const; | |
// Calls `iteratee` with each segment name and the semiopen interval of node ids | |
// corresponding to it. Stops early if the call returns `false`. | |
// In GBWTGraph, the segments are visited in sorted order by node ids. | |
void for_each_segment(const std::function<bool(const std::string&, std::pair<nid_t, nid_t>)>& iteratee) const; | |
// Calls `iteratee` with each inter-segment edge and the corresponding segment names | |
// in the canonical orientation. Stops early if the call returns `false`. | |
void for_each_link(const std::function<bool(const edge_t&, const std::string&, const std::string&)>& iteratee) const; | |
//------------------------------------------------------------------------------ | |
/* | |
GBWTGraph specific interface. | |
*/ | |
// Serialize the the graph into the output stream in the simple-sds format. | |
void simple_sds_serialize(std::ostream& out) const; | |
// Deserialize or decompress the graph from the input stream and set the given | |
// GBWT index. Note that the GBWT index is essential for loading the structure. | |
// Throws sdsl::simple_sds::InvalidData if sanity checks fail and `InvalidGBWT` | |
// if the GBWT index is not bidirectional. | |
void simple_sds_load(std::istream& in, const gbwt::GBWT& gbwt_index); | |
// Returns the size of the serialized structure in elements. | |
size_t simple_sds_size() const; | |
// Convert gbwt::node_type to handle_t.gbwt:: | |
static handle_t node_to_handle(gbwt::node_type node) { return handlegraph::as_handle(node); } | |
// Convert handle_t to gbwt::node_type. | |
static node_type handle_to_node(ref_handle handle) { return handlegraph::as_integer(handle); } | |
// Get node sequence as a pointer and length. | |
view_type get_sequence_view(ref_handle handle) const; | |
// Determine if the node sequence starts with the given character. | |
bool starts_with(ref_handle handle, char c) const; | |
// Determine if the node sequence ends with the given character. | |
bool ends_with(ref_handle handle, char c) const; | |
// Convert handle_t to gbwt::SearchState. | |
// Note that the state may be empty if the handle does not correspond to a real node. | |
SearchState get_state(ref_handle handle) const { return this->index->find(handle_to_node(handle)); } | |
// Convert handle_t to gbwt::BidirectionalState. | |
// Note that the state may be empty if the handle does not correspond to a real node. | |
gbwt::BidirectionalState get_bd_state(ref_handle handle) const { return this->index->bdFind(handle_to_node(handle)); } | |
SearchState extend(SearchState state, nid_t node) const ; | |
/* gbwt::SearchState extend(SearchState state, handle_t handle) const; */ | |
// Get the search state corresponding to the vector of handles. | |
gbwt::SearchState find(const std::vector<handle_t>& path) const; | |
// Get the bidirectional search state corresponding to the vector of handles. | |
gbwt::BidirectionalState bd_find(const std::vector<handle_t>& path) const; | |
// Visit all successor states of this state and call iteratee for the state. | |
// Stop and return false if the iteratee returns false. | |
// Note that this does not visit empty successor states. | |
bool follow_paths(gbwt::SearchState state, const std::function<bool(const gbwt::SearchState&)>& iteratee) const | |
{ | |
return this->follow_paths(this->get_single_cache(), state, iteratee); | |
} | |
// Visit all predecessor/successor states of this state and call iteratee for the state. | |
// Stop and return false if the iteratee returns false. | |
// Note that this does not visit empty predecessor/successor states. | |
// Each state corresponds to a path. Going backward extends the path left, while going | |
// extends it right. When going backward, the state is for the reverse orientation. | |
bool follow_paths(gbwt::BidirectionalState state, bool backward, | |
const std::function<bool(const gbwt::BidirectionalState&)>& iteratee) const | |
{ | |
return this->follow_paths(this->get_single_cache(), state, backward, iteratee); | |
} | |
//------------------------------------------------------------------------------ | |
/* | |
Cached GBWTGraph specific interface. Each thread must use a separate cache. | |
*/ | |
// Return a cache for the GBWT index. Note: The cache is not thread-safe. | |
gbwt::CachedGBWT get_cache() const { return gbwt::CachedGBWT(*(this->index), false); } | |
// Return a single-node cache for the GBWT index. Mostly for internal use. | |
// Note: The cache is not thread-safe. | |
gbwt::CachedGBWT get_single_cache() const { return gbwt::CachedGBWT(*(this->index), true); } | |
// Convert handle_t to gbwt::SearchState. | |
/* gbwt::SearchState get_state(const gbwt::CachedGBWT& cache, ref_handle handle) const */ | |
/* { */ | |
/* return cache.find(handle_to_node(handle)); */ | |
/* } */ | |
// Convert handle_t to gbwt::BidirectionalState. | |
gbwt::BidirectionalState get_bd_state(const gbwt::CachedGBWT& cache, ref_handle handle) const | |
{ | |
return cache.bdFind(handle_to_node(handle)); | |
} | |
// Get the search state corresponding to the vector of handles. | |
gbwt::SearchState find(const gbwt::CachedGBWT& cache, const std::vector<handle_t>& path) const; | |
// Get the bidirectional search state corresponding to the vector of handles. | |
gbwt::BidirectionalState bd_find(const gbwt::CachedGBWT& cache, const std::vector<handle_t>& path) const; | |
// Visit all successor states of this state and call iteratee for the state. | |
// Stop and return false if the iteratee returns false. | |
// Note that the state may be empty if no path continues to that node. | |
bool follow_paths(const gbwt::CachedGBWT& cache, gbwt::SearchState state, | |
const std::function<bool(const gbwt::SearchState&)>& iteratee) const; | |
// Visit all predecessor/successor states of this state and call iteratee for the state. | |
// Stop and return false if the iteratee returns false. | |
// Note that the state may be empty if no path continues to that node. | |
// Each state corresponds to a path. Going backward extends the path left, while going | |
// extends it right. When going backward, the state is for the reverse orientation. | |
bool follow_paths(const gbwt::CachedGBWT& cache, gbwt::BidirectionalState state, bool backward, | |
const std::function<bool(const gbwt::BidirectionalState&)>& iteratee) const; | |
// Loop over all the handles to next/previous (right/left) nodes. Passes | |
// them to a callback which returns false to stop iterating and true to | |
// continue. Returns true if we finished and false if we stopped early. | |
bool cached_follow_edges(const gbwt::CachedGBWT& cache, ref_handle handle, bool go_left, | |
const std::function<bool(const handle_t&)>& iteratee) const; | |
//------------------------------------------------------------------------------ | |
std::vector<handle_t> collect_succesive_edges (nid_t node); | |
std::vector<handle_t> collect_succesive_edges (handle_t handle) ; | |
std::vector<gbwt::SearchState> collect_succesive_paths (handle_t handle); | |
std::vector<gbwt::SearchState> collect_succesive_paths (nid_t node) ; | |
std::vector<gbwt::SearchState> collect_succesive_paths (handle_t handle , int number_of_paths ); | |
std::vector<gbwt::SearchState> collect_succesive_paths (nid_t node , int number_of_paths ); | |
vector<void*> extend_to_valid_states (void* vstate); | |
/* vector<SearchState*> extend_to_valid_states (void* vstate); */ | |
}; | |
GRAPH gfa_to_gbwtgraph (char* file); | |
GBWT gfa_to_gbwt (char* file) ; | |
SearchState* convert_void (void *input ) ; | |
SearchState pointer_to_searchstate (void *input ); | |
//------------------------------------------------------------------------------ | |
/* | |
Traverse all haplotype-consistent windows in the graph and call lambda() for each window. | |
Uses multiple threads, so the lambda should be thread-safe. | |
A window starts with the sequence of a node and is followed by window_size - 1 bases | |
from subsequent nodes. If no extensions are possible, a shorter substring of | |
length >= window_size also qualifies as a window. | |
*/ | |
void for_each_haplotype_window(const GBWTGraph& graph, size_t window_size, | |
const std::function<void(const std::vector<handle_t>&, const std::string&)>& lambda, | |
bool parallel); | |
//------------------------------------------------------------------------------ | |
%template(LongPair) std::pair<std::uint64_t, std::uint64_t>; | |
struct SearchState | |
{ | |
node_type node; | |
range_type range; | |
SearchState() : node(ENDMARKER), range(Range::empty_range()) {} | |
size_type size() const { return Range::length(this->range); } | |
SearchState(node_type node_id, range_type offset_range) : node(node_id), range(offset_range) {} | |
SearchState(node_type node_id, size_type sp, size_type ep) : node(node_id), range(sp, ep) {} | |
bool empty() const { return Range::empty(this->range); } | |
bool operator==(SearchState another) const { return (this->node == another.node && this->range == another.range); } | |
bool operator!=(SearchState another) const { return (this->node != another.node || this->range != another.range); } | |
}; | |
%inline { | |
range_type test_range_type (void) { | |
range_type the_range ; | |
the_range.first = 3 ; | |
the_range.second= 4 ; | |
return the_range; | |
}; | |
range_type SearchState_get_range (SearchState* state){ | |
std::pair<size_type, size_type> the_range = state->range; | |
range_type return_value; | |
return_value.first = the_range.first; | |
return_value.second= the_range.second; | |
return return_value; | |
} ; | |
}; | |
vector<void*> sample_distribution (vector<double> weights, vector<void*> nodes, int number_of_draws); | |
range_type SearchState_get_range (SearchState* state); | |
class GBWT { | |
public: | |
GBWT(); | |
GBWT(const GBWT& source); | |
/* explicit GBWT(const std::vector<GBWT>& sources); */ | |
void swap(GBWT& another); | |
void resample(size_type sample_interval); | |
size_type serialize(std::ostream& out, sdsl::structure_tree_node* v = nullptr, std::string name = "") const; | |
void load(std::istream& in); | |
void simple_sds_serialize(std::ostream& out) const; | |
void simple_sds_load(std::istream& in); | |
size_t simple_sds_size() const; | |
size_type size() const { return this->header.size; } | |
bool empty() const { return (this->size() == 0); } | |
size_type sequences() const { return this->header.sequences; } | |
size_type sigma() const { return this->header.alphabet_size; } | |
size_type effective() const { return this->header.alphabet_size - this->header.offset; } | |
std::pair<size_type, size_type> runs() const; | |
size_type samples() const { return this->da_samples.size(); } | |
bool bidirectional() const { return this->header.get(GBWTHeader::FLAG_BIDIRECTIONAL); } | |
//------------------------------------------------------------------------------ | |
/* | |
Metadata interface. | |
*/ | |
bool hasMetadata() const { return this->header.get(GBWTHeader::FLAG_METADATA); } | |
void addMetadata() { this->header.set(GBWTHeader::FLAG_METADATA); } | |
void clearMetadata() { this->metadata.clear(); this->header.unset(GBWTHeader::FLAG_METADATA); }; | |
//------------------------------------------------------------------------------ | |
SearchState find(node_type node) const { return gbwt::find(*this, node); } | |
//all template classes are problematic | |
/* template<class Iterator> */ | |
/* SearchState find(Iterator begin, Iterator end) const { return gbwt::find(*this, begin, end); } */ | |
SearchState prefix(node_type node) const { return gbwt::prefix(*this, node); } | |
/* template<class Iterator> */ | |
/* SearchState prefix(Iterator begin, Iterator end) const { return gbwt::prefix(*this, begin, end); } */ | |
SearchState extend(SearchState state, node_type node) const { return gbwt::extend(*this, state, node); } | |
/* template<class Iterator> */ | |
/* SearchState extend(SearchState state, Iterator begin, Iterator end) const { return gbwt::extend(*this, state, begin, end); } */ | |
/* */ | |
size_type locate(node_type node, size_type i) const { return gbwt::locate(*this, edge_type(node, i)); } | |
size_type locate(edge_type position) const { return gbwt::locate(*this, position); } | |
//problematic | |
/* std::vector<size_type> locate(node_type node, range_type range) const { return this->locate(SearchState(node, range)); } */ | |
/* std::vector<size_type> locate(SearchState state) const; */ | |
/* */ | |
/* vector_type extract(size_type sequence) const { return gbwt::extract(*this, sequence); } */ | |
vector_type extract(edge_type position) const { return gbwt::extract(*this, position); } | |
/* vector_type extract(edge_type position, size_type max_length) const { return gbwt::extract(*this, position, max_length); } */ | |
//endpr | |
//------------------------------------------------------------------------------ | |
/* | |
Bidirectional search interface. The queries check that the parameters are valid. | |
On error or failed search, the return value is an empty bidirectional search state. | |
*/ | |
BidirectionalState bdFind(node_type node) const { return gbwt::bdFind(*this, node); } | |
BidirectionalState bdExtendForward(BidirectionalState state, node_type node) const { return gbwt::bdExtendForward(*this, state, node); } | |
BidirectionalState bdExtendBackward(BidirectionalState state, node_type node) const { return gbwt::bdExtendBackward(*this, state, node); } | |
//------------------------------------------------------------------------------ | |
/* | |
Low-level interface: Nodes. The interface assumes that node identifiers are valid, | |
except in contains() / hasEdge(). This can be checked with contains(). | |
*/ | |
bool contains(node_type node) const | |
{ | |
return ((node < this->sigma() && node > this->header.offset) || node == ENDMARKER); | |
} | |
bool contains(edge_type position) const | |
{ | |
return (this->contains(position.first) && position.second < this->nodeSize(position.first)); | |
} | |
bool contains(SearchState state) const | |
{ | |
return (this->contains(state.node) && !(state.empty()) && state.range.second < this->nodeSize(state.node)); | |
} | |
bool hasEdge(node_type from, node_type to) const | |
{ | |
return (this->contains(from) && this->record(from).hasEdge(to)); | |
} | |
std::vector<edge_type> edges(node_type from) const | |
{ | |
return this->record(from).outgoing; | |
} | |
node_type firstNode() const { return this->header.offset + 1; } | |
comp_type toComp(node_type node) const { return (node == 0 ? node : node - this->header.offset); } | |
node_type toNode(comp_type comp) const { return (comp == 0 ? comp : comp + this->header.offset); } | |
size_type nodeSize(node_type node) const { return this->bwt.size(this->toComp(node)); } | |
bool empty(node_type node) const { return this->bwt.empty(this->toComp(node)); } | |
//------------------------------------------------------------------------------ | |
/* | |
Low-level interface: Navigation and searching. The interface assumes that node | |
identifiers are valid. This can be checked with contains(). | |
*/ | |
// On error: invalid_edge(). | |
edge_type LF(node_type from, size_type i) const | |
{ | |
if(from == ENDMARKER) { return this->endmarker().LF(i); } | |
return this->record(from).LF(i); | |
} | |
// On error: invalid_edge(). | |
edge_type LF(edge_type position) const | |
{ | |
if(position.first == ENDMARKER) { return this->endmarker().LF(position.second); } | |
return this->record(position.first).LF(position.second); | |
} | |
// On error: invalid_offset(). | |
size_type LF(node_type from, size_type i, node_type to) const | |
{ | |
return this->record(from).LF(i, to); | |
} | |
// On error: invalid_offset(). | |
size_type LF(edge_type position, node_type to) const | |
{ | |
return this->record(position.first).LF(position.second, to); | |
} | |
// On error: Range::empty_range(). | |
range_type LF(node_type from, range_type range, node_type to) const | |
{ | |
return this->record(from).LF(range, to); | |
} | |
// On error: Range::empty_range(). | |
range_type LF(SearchState state, node_type to) const | |
{ | |
return this->record(state.node).LF(state.range, to); | |
} | |
// On error: Range::empty_range(). | |
range_type bdLF(SearchState state, node_type to, size_type& reverse_offset) const | |
{ | |
return this->record(state.node).bdLF(state.range, to, reverse_offset); | |
} | |
//------------------------------------------------------------------------------ | |
/* | |
Low-level interface: Sequences. The interface assumes that node identifiers are | |
valid. This can be checked with contains(). | |
*/ | |
// Starting position of the sequence or invalid_edge() if something fails. | |
edge_type start(size_type sequence) const { return this->LF(ENDMARKER, sequence); } | |
// Returns the sampled document identifier or invalid_sequence() if there is no sample. | |
size_type tryLocate(node_type node, size_type i) const | |
{ | |
return this->da_samples.tryLocate(this->toComp(node), i); | |
} | |
// Returns the sampled document identifier or invalid_sequence() if there is no sample. | |
size_type tryLocate(edge_type position) const | |
{ | |
return this->da_samples.tryLocate(this->toComp(position.first), position.second); | |
} | |
} ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
swig configuration for gbwtgraph and scheme