Last active
August 29, 2015 14:08
-
-
Save moonwatcher/2f4c5e0f66da3edbaaba to your computer and use it in GitHub Desktop.
C++ fastq demultiplexer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
FASTQ Demultiplexer | |
Author: Lior Galanti [email protected] | |
Build: | |
On ubuntu: | |
dependencies: sudo apt-get install lib32z1-dev libboost-iostreams-dev libboost-program-options-dev | |
compile: g++ -static phenaqs.cpp -lboost_iostreams -lboost_program_options -lz -std=c++11 -o phenaqs | |
On OS X: | |
dependencies: brew install boost --c++11 --with-icu | |
compile: g++ phenaqs.cpp -lboost_iostreams -lboost_program_options -lz -std=c++11 -o phenaqs | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include <string> | |
#include <unordered_map> | |
#include <vector> | |
#include <exception> | |
#include <fstream> | |
#include <iostream> | |
#include <boost/property_tree/exceptions.hpp> | |
#include <boost/iostreams/filtering_streambuf.hpp> | |
#include <boost/iostreams/filter/gzip.hpp> | |
#include <boost/property_tree/ptree.hpp> | |
#include <boost/property_tree/json_parser.hpp> | |
#include <boost/foreach.hpp> | |
#include <boost/lexical_cast.hpp> | |
#include <boost/algorithm/string.hpp> | |
#include <boost/program_options.hpp> | |
#define MAX_READ_SIZE 2048 | |
#define MAX_READ_ID_SIZE 2048 | |
#define READ_BUFFER_SIZE 2048 | |
#define WRITE_BUFFER_SIZE 2048 | |
using namespace std; | |
class FastqInputFeed { | |
private: | |
string path; | |
ifstream file; | |
istream* stream; | |
boost::iostreams::filtering_streambuf<boost::iostreams::input> buffer; | |
public: | |
int key; | |
FastqInputFeed(){} | |
~FastqInputFeed(){} | |
FastqInputFeed( const FastqInputFeed &obj){}; | |
bool done(){ | |
return stream->eof(); | |
} | |
istream* getStream() { | |
return stream; | |
} | |
void open(string path) { | |
this->path = path; | |
file.open(path, ios_base::in | ios_base::binary); | |
buffer.push(boost::iostreams::gzip_decompressor()); | |
buffer.push(file); | |
stream = new istream(&buffer); | |
} | |
void close() { | |
buffer.pop(); | |
delete stream; | |
file.close(); | |
} | |
}; | |
class FastqOutputFeed { | |
private: | |
string path; | |
ofstream file; | |
ostream* stream; | |
boost::iostreams::filtering_streambuf<boost::iostreams::output> buffer; | |
public: | |
int key; | |
FastqOutputFeed(){} | |
~FastqOutputFeed(){} | |
FastqOutputFeed( const FastqOutputFeed &obj){}; | |
ostream* getStream() { | |
return stream; | |
} | |
void open(string path) { | |
this->path = path; | |
file.open(path, ios_base::out | ios_base::binary); | |
buffer.push(boost::iostreams::gzip_compressor()); | |
buffer.push(file); | |
stream = new ostream(&buffer); | |
} | |
void close() { | |
buffer.pop(); | |
delete stream; | |
file.close(); | |
} | |
}; | |
class Sequence { | |
private: | |
int length; | |
public: | |
char* code; | |
char* quality; | |
Sequence() { | |
code = new char[MAX_READ_SIZE](); | |
quality = new char[MAX_READ_SIZE](); | |
clear(); | |
} | |
~Sequence() { | |
delete code; | |
delete quality; | |
} | |
int getLength(){ | |
return length; | |
} | |
void terminate() { | |
this->code[this->length] = '\0'; | |
this->quality[this->length] = '\0'; | |
} | |
void clear() { | |
length = 0; | |
terminate(); | |
} | |
void assign(const string& code, const string& quality) { | |
strcpy(this->code, code.c_str()); | |
strcpy(this->quality, quality.c_str()); | |
this->length = strlen(this->code); | |
} | |
void append(const Sequence& other) { | |
strncpy(this->code + this->length, other.code, other.length); | |
strncpy(this->quality + this->length, other.quality, other.length); | |
this->length += other.length; | |
terminate(); | |
} | |
void append(const Sequence& other, const int start, const int length) { | |
// if length is -1, copy up to the end of the read | |
int actual = length < 0 ? other.length - start : length; | |
strncpy(this->code + this->length, other.code, actual); | |
strncpy(this->quality + this->length, other.quality, actual); | |
this->length += actual; | |
terminate(); | |
} | |
int read(FastqInputFeed& feed) { | |
int result = -1; | |
if (!feed.done()) { | |
feed.getStream()->getline(code, MAX_READ_SIZE); | |
length = strlen(code); | |
if (!feed.done()) { | |
feed.getStream()->ignore(2, '\n'); | |
if (!feed.done()) { | |
feed.getStream()->getline(quality, MAX_READ_SIZE); | |
result = 1; | |
} | |
} | |
} | |
return result; | |
} | |
void write(FastqOutputFeed& feed) const { | |
// probably want to be safer here... | |
*(feed.getStream()) << code << "\n+\n" << quality << endl; | |
} | |
}; | |
class Barcode { | |
private: | |
public: | |
Sequence hash; | |
vector<Sequence> sequences; | |
Barcode() { | |
clear(); | |
} | |
void resize(int n) { | |
sequences.resize(n); | |
} | |
void clear() { | |
hash.clear(); | |
for(vector<Sequence>::iterator sequence = sequences.begin(); sequence != sequences.end(); sequence++) { | |
sequence->clear(); | |
} | |
} | |
void makeHash(){ | |
hash.clear(); | |
for(vector<Sequence>::iterator sequence = sequences.begin(); sequence != sequences.end(); sequence++) { | |
hash.append(*sequence); | |
} | |
} | |
void append(const Sequence& sequence, const int key, const int start, const int length) { | |
this->sequences[key].append(sequence, start, length); | |
} | |
void assign(const int key, const string& code, const string& quality) { | |
sequences[key].assign(code, quality); | |
} | |
bool match(const Barcode& other, const vector<int>& missmatches) { | |
bool result = true; | |
for(vector<Sequence>::size_type i = 0; i<sequences.size(); i++) { | |
int missmatch = 0; | |
for(int j=0; j<sequences[i].getLength(); j++) { | |
if(sequences[i].code[j] != other.sequences[i].code[j]) { | |
missmatch++; | |
if(missmatch > missmatches[i]) { | |
result = false; | |
break; | |
} | |
} | |
} | |
if(!result) { break; } | |
} | |
return result; | |
} | |
}; | |
class ReadID { | |
private: | |
int length; | |
char* value; | |
char* prefix; | |
int nibble; | |
char filtered; | |
char* flags; | |
char* barcode; | |
char* buffer; | |
public: | |
ReadID() { | |
value = new char[MAX_READ_ID_SIZE](); | |
prefix = new char[MAX_READ_ID_SIZE](); | |
flags = new char[MAX_READ_ID_SIZE](); | |
barcode = new char[MAX_READ_ID_SIZE](); | |
buffer = new char[MAX_READ_ID_SIZE](); | |
clear(); | |
} | |
void copy(const ReadID& other) { | |
clear(); | |
strncpy(this->value, other.value, other.length); | |
length = other.length; | |
} | |
void parse() { | |
/* | |
This will only work with the latest Illumina ID | |
@HWI-ST911:232:HABDFADXX:1:1101:1224:1932 1:N:0: | |
0 Instrument ID @HWI-ST911 | |
1 Run count 232 | |
2 Flowcell ID HABDFADXX | |
3 Lane number 1 | |
4 Tile 1101 | |
5 x coordinate 1224 | |
6 y coordinate 1932 | |
7 Nibble number 2 | |
8 Filtered N | |
9 Flags 0 | |
10 Barcode CGATGT | |
Prefix $0:$1:$2:$3:$4:$5:$6 | |
nibble $7 | |
filtered $8 | |
flags $9 | |
barcode $10 | |
@HWI-ST911:232:HABDFADXX:1:1101:1224:1932 1:N:0: | |
*/ | |
int offset = 0; | |
int position = 0; | |
for(int i = 0; i<length; i++) { | |
if(position == 0 && value[i] == ' '){ | |
position++; | |
this->prefix[offset] = '\0'; | |
offset = 0; | |
} else if(position > 0 && value[i] == ':') { | |
position++; | |
} | |
switch (position) { | |
case 0: | |
this->prefix[offset] = value[i]; | |
offset++; | |
break; | |
case 2: | |
this->nibble = boost::lexical_cast<int>(value[i-1]); | |
position++; | |
break; | |
case 3: | |
this->filtered = value[i]; | |
break; | |
case 4: | |
position++; | |
break; | |
case 5: | |
this->flags[offset] = value[i]; | |
offset++; | |
break; | |
case 6: | |
position++; | |
this->flags[offset] = '\0'; | |
offset = 0; | |
break; | |
case 7: | |
this->barcode[offset] = value[i]; | |
offset++; | |
break; | |
} | |
} | |
this->barcode[offset] = '\0'; | |
} | |
~ReadID() { | |
delete value; | |
delete prefix; | |
delete flags; | |
delete barcode; | |
} | |
void terminate() { | |
this->value[this->length] = '\0'; | |
} | |
void clear() { | |
length = 0; | |
this->prefix[0] = '\0'; | |
this->flags[0] = '\0'; | |
this->barcode[0] = '\0'; | |
this->buffer[0] = '\0'; | |
terminate(); | |
} | |
int read(FastqInputFeed& feed) { | |
int result = -1; | |
if (!feed.done()) { | |
feed.getStream()->getline(value, MAX_READ_ID_SIZE); | |
length = strlen(value); | |
result = 1; | |
} | |
return result; | |
} | |
void write(FastqOutputFeed& feed, const int nibble, const Barcode& barcode) const { | |
sprintf( buffer, "%s %d:%c:%s:%s", | |
prefix, | |
nibble + 1, // internally nibbles are zero based but on the read id are 1 based | |
filtered, | |
flags, | |
barcode.hash.code | |
); | |
*(feed.getStream()) << buffer << endl; | |
} | |
}; | |
class Nibble { | |
public: | |
int key; | |
ReadID readId; | |
Sequence sequence; | |
void clear() { | |
readId.clear(); | |
sequence.clear(); | |
} | |
int read(FastqInputFeed& feed) { | |
int result = readId.read(feed); | |
if(result > 0) { result = sequence.read(feed); } | |
return result; | |
} | |
void write(FastqOutputFeed& feed, const int key, const Barcode& barcode) const { | |
readId.write(feed, key, barcode); | |
sequence.write(feed); | |
} | |
}; | |
class Read { | |
public: | |
Barcode barcode; | |
vector<Nibble> input; | |
vector<Nibble> output; | |
Read() { | |
input.resize(2); | |
output.resize(2); | |
int i = 0; | |
for(vector<Nibble>::iterator nibble = input.begin(); nibble != input.end(); nibble++) { | |
nibble->key = i; | |
i++; | |
} | |
i = 0; | |
for(vector<Nibble>::iterator nibble = output.begin(); nibble != output.end(); nibble++) { | |
nibble->key = i; | |
i++; | |
} | |
} | |
void append(const int source, const int target, const int start, const int length){ | |
this->output[target].sequence.append(this->input[source].sequence, start, length); | |
} | |
void clear() { | |
barcode.clear(); | |
for(vector<Nibble>::iterator nibble = input.begin(); nibble != input.end(); nibble++) { | |
nibble->clear(); | |
} | |
for(vector<Nibble>::iterator nibble = output.begin(); nibble != output.end(); nibble++) { | |
nibble->clear(); | |
} | |
} | |
}; | |
class Fragment { | |
public: | |
int nibble; | |
int start; | |
int length; | |
}; | |
class Library { | |
private: | |
vector<FastqOutputFeed> streams; | |
public: | |
int count; | |
Barcode barcode; | |
Library() { | |
count = 0; | |
streams.resize(1); | |
} | |
void openStream(const int key, const string path) { | |
this->streams[key].key = key; | |
this->streams[key].open(path); | |
} | |
void write(const Read& read) { | |
for(vector<FastqOutputFeed>::iterator iterator = streams.begin(); iterator != streams.end(); iterator++) { | |
// Write with the library's barcode, not with the read's | |
// this will make sure read with missmatched barcodes within range | |
// get written with the imputed barcode | |
read.output[iterator->key].write(*iterator, iterator->key, this->barcode); | |
} | |
count ++; | |
} | |
void close() { | |
for(vector<FastqOutputFeed>::iterator iterator = streams.begin(); iterator != streams.end(); iterator++) { | |
iterator->close(); | |
} | |
} | |
}; | |
class Feed { | |
private: | |
int count; | |
vector<FastqInputFeed> streams; | |
public: | |
Feed() { | |
count = 0; | |
} | |
void resize(int n) { | |
streams.resize(n); | |
} | |
int getCount() { | |
return count; | |
} | |
void openStream(const int key, const string path) { | |
this->streams[key].key = key; | |
this->streams[key].open(path); | |
} | |
int read(Read& read) { | |
int result = -1; | |
for(vector<FastqInputFeed>::iterator iterator = streams.begin(); iterator != streams.end(); iterator++) { | |
result = read.input[iterator->key].read(*iterator); | |
if (result < 0) { | |
read.clear(); | |
break; | |
} | |
} | |
if (result > 0) { count++; } | |
return result; | |
} | |
void close() { | |
for(vector<FastqInputFeed>::iterator iterator = streams.begin(); iterator != streams.end(); iterator++) { | |
iterator->close(); | |
} | |
} | |
}; | |
class Configuration { | |
private: | |
void loadFragmentPattern(const vector<string> pattern) { | |
fragmentPattern.resize(pattern.size()); | |
for(vector<string>::size_type i = 0; i < pattern.size(); i++) { | |
fragmentPattern[i].resize(3); | |
vector<string> parsed(3); | |
boost::split(parsed, pattern[i], boost::is_any_of(":")); | |
try { | |
fragmentPattern[i][0] = boost::lexical_cast<int>(parsed[0]); | |
} catch(boost::bad_lexical_cast) { | |
cout << "Bad ouput reference" << endl; | |
} | |
fragmentPattern[i][1] = parsed[1].empty() ? 0 : boost::lexical_cast<int>(parsed[1]); | |
fragmentPattern[i][2] = parsed[2].empty() ? -1 : boost::lexical_cast<int>(parsed[2]); | |
} | |
} | |
void loadLibraryPattern(const vector<string> pattern) { | |
libraryPattern.resize(pattern.size()); | |
for(vector<string>::size_type i = 0; i < pattern.size(); i++) { | |
vector<string> parsed; | |
boost::split(parsed, pattern[i], boost::is_any_of(":")); | |
try { | |
for(vector<string>::iterator j = parsed.begin(); j<parsed.end(); j++){ | |
libraryPattern[i].push_back(boost::lexical_cast<int>(*j)); | |
} | |
} catch(boost::bad_lexical_cast) { | |
cout << "Bad library reference" << endl; | |
} | |
} | |
} | |
void loadBarcodePattern(const vector<string> pattern) { | |
barcodePattern.resize(pattern.size()); | |
for(vector<string>::size_type i = 0; i < pattern.size(); i++) { | |
vector<string> parsed; | |
boost::split(parsed, pattern[i], boost::is_any_of(":")); | |
try { | |
for(vector<string>::iterator j = parsed.begin(); j<parsed.end(); j++){ | |
barcodePattern[i].push_back(boost::lexical_cast<int>(*j)); | |
} | |
} catch(boost::bad_lexical_cast) { | |
cout << "Bad barcode reference" << endl; | |
} | |
} | |
} | |
void loadConfigurationFile() { | |
using boost::property_tree::ptree; | |
ptree tree; | |
read_json(path, tree); | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--input")) { | |
this->input.push_back(i.second.get_value<string>()); | |
} | |
vector<string> pattern; | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--fragment-pattern")) { | |
pattern.push_back(i.second.get_value<string>()); | |
} | |
loadFragmentPattern(pattern); | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--output")) { | |
this->output.push_back(i.second.get_value<string>()); | |
} | |
pattern.clear(); | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--library-pattern")) { | |
pattern.push_back(i.second.get_value<string>()); | |
} | |
loadLibraryPattern(pattern); | |
pattern.clear(); | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--barcode-pattern")) { | |
pattern.push_back(i.second.get_value<string>()); | |
} | |
loadBarcodePattern(pattern); | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--barcode-missmatch")) { | |
this->barcodeMissmatch.push_back(i.second.get_value<int>()); | |
} | |
BOOST_FOREACH(ptree::value_type &i, tree.get_child("--barcode-sequence")) { | |
this->barcodeSequence.push_back(i.second.get_value<string>()); | |
} | |
undetermined = tree.get<int>("--undetermined", -1); | |
} | |
public: | |
string path; | |
vector<string> input; | |
vector<string> output; | |
vector<vector<int>> fragmentPattern; | |
vector<vector<int>> libraryPattern; | |
vector<vector<int>> barcodePattern; | |
vector<string> barcodeSequence; | |
vector<int> barcodeMissmatch; | |
int undetermined; | |
int id; | |
int librarySize; | |
int libraryCount; | |
int barcodeSize; | |
bool parse(int argc, char** argv) { | |
vector<string> fragmentPattern; | |
vector<string> libraryPattern; | |
vector<string> barcodePattern; | |
namespace po = boost::program_options; | |
po::options_description description("Allowed options"); | |
description.add_options() | |
("help,h", "show this help message") | |
("conf,c", po::value<string>(&path), "configuration file") | |
("input,i", po::value< vector<string> >(&input), "input path") | |
("output,o", po::value< vector<string> >(&output), "output path") | |
("undetermined,u", po::value<int>(&undetermined), "undetermined output") | |
("id,d", po::value<int>(&id), "input to provide read id") | |
("fragment,f", po::value< vector<string> >(&fragmentPattern), "fragment pattern") | |
("library,l", po::value< vector<string> >(&libraryPattern), "library pattern") | |
("barcode,b", po::value< vector<string> >(&barcodePattern), "barcode pattern") | |
("missmatch,m", po::value< vector<int> >(&barcodeMissmatch), "allowed missmatch") | |
("sequence,s", po::value< vector<string> >(&barcodeSequence), "barcode sequence") | |
; | |
po::variables_map vm; | |
po::store(po::parse_command_line(argc, argv, description), vm); | |
po::notify(vm); | |
if (vm.count("help") || argc < 2) { | |
cout << description << "\n"; | |
return false; | |
} else { | |
if (vm.count("conf")){ | |
loadConfigurationFile(); | |
} else { | |
loadFragmentPattern(fragmentPattern); | |
loadLibraryPattern(libraryPattern); | |
loadBarcodePattern(barcodePattern); | |
} | |
barcodeSize = this->barcodePattern.size(); | |
librarySize = this->libraryPattern.size(); | |
libraryCount = (librarySize) ? (int)(this->output.size() / librarySize) : 0; | |
} | |
return true; | |
} | |
void print() { | |
cout << "Input" << endl; | |
for(vector<string>::size_type i=0; i<input.size(); i++){ | |
cout << i << " : " << input[i] << endl; | |
} | |
cout << "Output" << endl; | |
for(vector<string>::size_type i=0; i<output.size(); i++){ | |
cout << i << " : " << output[i] << endl; | |
} | |
cout << "Fragment Pattern" << endl; | |
for(vector<vector<int> >::size_type i=0; i<fragmentPattern.size(); i++){ | |
cout << fragmentPattern[i][0] << " : " << fragmentPattern[i][1] << " : " << fragmentPattern[i][2] << endl; | |
} | |
cout << "Library Pattern" << endl; | |
for(vector<vector<int>>::size_type i=0; i<libraryPattern.size(); i++) { | |
cout << i; | |
for(vector<int>::size_type j=0; j<libraryPattern[i].size(); j++) { | |
cout << " : " << libraryPattern[i][j]; | |
} | |
cout << endl; | |
} | |
cout << "Barcode Pattern" << endl; | |
for(vector<string>::size_type i=0; i<barcodePattern.size(); i++){ | |
cout << i; | |
for(vector<int>::size_type j=0; j<libraryPattern[i].size(); j++) { | |
cout << " : " << barcodePattern[i][j]; | |
} | |
cout << endl; | |
} | |
cout << "Barcode Sequence" << endl; | |
for(vector<string>::size_type i=0; i<barcodeSequence.size(); i++){ | |
cout << i << " : " << barcodeSequence[i] << endl; | |
} | |
cout << "Barcode Missmatch" << endl; | |
for(vector<int>::size_type i=0; i<barcodeMissmatch.size(); i++){ | |
cout << i << " : " << barcodeMissmatch[i] << endl; | |
} | |
cout << "Undetermined " << " : " << undetermined << endl; | |
} | |
void validate() { | |
/* | |
if (vm.count("compression")) { | |
cout << "Compression level was set to " | |
<< vm["compression"].as<int>() << ".\n"; | |
} else { | |
cout << "Compression level was not set.\n"; | |
} | |
*/ | |
} | |
}; | |
class Demultiplexer { | |
public: | |
Configuration& configuration; | |
Feed feed; | |
vector<Fragment> fragments; | |
vector<Library> libraries; | |
unordered_map<string, Library*> barcodeLookup; | |
Library* undetermined = NULL; | |
Demultiplexer(Configuration& configuration) : configuration(configuration) {} | |
void open() { | |
// Populate the feed with the input streams | |
feed.resize(configuration.input.size()); | |
for(vector<string>::size_type i=0; i<configuration.input.size(); i++) { | |
feed.openStream(i, configuration.input[i]); | |
} | |
// Populate the libraries with the output streams | |
int k = 0; | |
libraries.resize(configuration.libraryCount); | |
for(vector<Library>::iterator i = libraries.begin(); i != libraries.end(); i++) { | |
for(int j=0; j<configuration.librarySize; j++) { | |
i->openStream(j, configuration.output[k]); | |
k++; | |
} | |
} | |
// Populate the fragments | |
fragments.resize(configuration.fragmentPattern.size()); | |
for(vector<Fragment>::size_type i=0; i<fragments.size(); i++) { | |
fragments[i].nibble = configuration.fragmentPattern[i][0]; | |
fragments[i].start = configuration.fragmentPattern[i][1]; | |
fragments[i].length = configuration.fragmentPattern[i][2]; | |
} | |
// Populate the library's barcodes the identify it | |
k = 0; | |
for(vector<Library>::iterator i = libraries.begin(); i != libraries.end(); i++) { | |
i->barcode.resize(configuration.barcodeSize); | |
for(int j=0; j<configuration.barcodeSize; j++) { | |
string quality(configuration.barcodeSequence[k].length(), 'N'); | |
i->barcode.assign(j, configuration.barcodeSequence[k], quality); | |
k++; | |
} | |
i->barcode.makeHash(); | |
} | |
// For perfect matches we can hash each Library by the concatination of all its barcodes | |
for(vector< Library >::iterator i = libraries.begin(); i != libraries.end(); i++) { | |
barcodeLookup[string(i->barcode.hash.code)] = &(*i); | |
} | |
// Asign the undetermined pointer if one was specified | |
if (configuration.undetermined > 0) { | |
undetermined = &libraries[configuration.undetermined]; | |
} | |
} | |
void close() { | |
feed.close(); | |
for(vector< Library >::iterator library = libraries.begin(); library != libraries.end(); library++) { | |
library->close(); | |
} | |
} | |
void choose(const Read& read) { | |
bool matched = false; | |
char* hash = read.barcode.hash.code; | |
if(this->barcodeLookup.count(hash) > 0){ | |
Library* library = this->barcodeLookup[hash]; | |
library->write(read); | |
matched = true; | |
} else { | |
matched = chooseWithMissmatch(read); | |
if(!matched && undetermined != NULL) { | |
undetermined->write(read); | |
} | |
} | |
} | |
bool chooseWithMissmatch(const Read& read) { | |
bool matched = false; | |
for(vector<Library>::iterator iterator = libraries.begin(); iterator != libraries.end(); iterator++) { | |
if(iterator->barcode.match(read.barcode, configuration.barcodeMissmatch)) { | |
iterator->write(read); | |
matched = true; | |
break; | |
} | |
} | |
return matched; | |
} | |
void transform(Read& read) { | |
// Populate the output nibbles with the library fragments | |
for(vector< vector<int> >::size_type i = 0; i < configuration.libraryPattern.size(); i++) { | |
for(vector<int>::iterator iterator = configuration.libraryPattern[i].begin(); iterator != configuration.libraryPattern[i].end(); iterator++) { | |
read.append(fragments[*iterator].nibble, i, fragments[*iterator].start, fragments[*iterator].length); | |
} | |
} | |
// Copy the read id from the first input nibble to the output nibbles | |
for (vector<Nibble>::iterator iterator = read.output.begin(); iterator != read.output.end(); iterator++) { | |
iterator->readId.copy(read.input[0].readId); | |
// don't forget to parse the read id | |
iterator->readId.parse(); | |
} | |
// Go over the barcode fragment specifications and populate the barcode with the fragments | |
for(vector< vector<int> >::size_type i = 0; i < configuration.barcodePattern.size(); i++) { | |
for(vector<int>::iterator iterator = configuration.barcodePattern[i].begin(); iterator != configuration.barcodePattern[i].end(); iterator++) { | |
read.barcode.append(read.input[fragments[*iterator].nibble].sequence, i, fragments[*iterator].start, fragments[*iterator].length); | |
} | |
} | |
// create the hash for the library | |
read.barcode.makeHash(); | |
} | |
int next(Read& read){ | |
read.clear(); | |
int result = feed.read(read); | |
if(result > 0) { | |
this->transform(read); | |
this->choose(read); | |
// cout << feed.getCount() << endl; | |
} | |
return result; | |
} | |
void run(){ | |
Read read; | |
read.input.resize(configuration.input.size()); | |
read.output.resize(configuration.librarySize); | |
read.barcode.sequences.resize(configuration.barcodeSize); | |
int result = 0; | |
while(this->next(read) > 0) { | |
result ++; | |
} | |
} | |
void statistics() { | |
int total = 0; | |
for(vector< Library >::size_type i = 0; i < libraries.size(); i++) { | |
total += libraries[i].count; | |
cout << i << " : " << libraries[i].count << endl; | |
} | |
cout << "Total : " << total << endl; | |
} | |
}; | |
int main(int argc, char** argv) { | |
Configuration configuration; | |
if (configuration.parse(argc,argv)) { | |
configuration.print(); | |
Demultiplexer* demultiplexer = new Demultiplexer(configuration); | |
demultiplexer->open(); | |
demultiplexer->run(); | |
demultiplexer->statistics(); | |
demultiplexer->close(); | |
delete demultiplexer; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment